LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - R600ISelLowering.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 865 941 91.9 %
Date: 2018-07-13 00:08:38 Functions: 37 40 92.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Custom DAG lowering for R600
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "R600ISelLowering.h"
      16             : #include "AMDGPUFrameLowering.h"
      17             : #include "AMDGPUSubtarget.h"
      18             : #include "R600Defines.h"
      19             : #include "R600FrameLowering.h"
      20             : #include "R600InstrInfo.h"
      21             : #include "R600MachineFunctionInfo.h"
      22             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      23             : #include "Utils/AMDGPUBaseInfo.h"
      24             : #include "llvm/ADT/APFloat.h"
      25             : #include "llvm/ADT/APInt.h"
      26             : #include "llvm/ADT/ArrayRef.h"
      27             : #include "llvm/ADT/DenseMap.h"
      28             : #include "llvm/ADT/SmallVector.h"
      29             : #include "llvm/CodeGen/CallingConvLower.h"
      30             : #include "llvm/CodeGen/DAGCombine.h"
      31             : #include "llvm/CodeGen/ISDOpcodes.h"
      32             : #include "llvm/CodeGen/MachineBasicBlock.h"
      33             : #include "llvm/CodeGen/MachineFunction.h"
      34             : #include "llvm/CodeGen/MachineInstr.h"
      35             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      36             : #include "llvm/CodeGen/MachineMemOperand.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/SelectionDAG.h"
      39             : #include "llvm/IR/Constants.h"
      40             : #include "llvm/IR/DerivedTypes.h"
      41             : #include "llvm/Support/Casting.h"
      42             : #include "llvm/Support/Compiler.h"
      43             : #include "llvm/Support/ErrorHandling.h"
      44             : #include "llvm/Support/MachineValueType.h"
      45             : #include <cassert>
      46             : #include <cstdint>
      47             : #include <iterator>
      48             : #include <utility>
      49             : #include <vector>
      50             : 
      51             : using namespace llvm;
      52             : 
      53           0 : static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
      54             :                             CCValAssign::LocInfo LocInfo,
      55             :                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
      56           0 :   MachineFunction &MF = State.getMachineFunction();
      57           0 :   AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
      58             : 
      59           0 :   uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
      60             :                                          ArgFlags.getOrigAlign());
      61           0 :   State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
      62           0 :   return true;
      63             : }
      64             : 
      65             : #include "R600GenCallingConv.inc"
      66             : 
      67         286 : R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
      68         286 :                                        const R600Subtarget &STI)
      69         286 :     : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
      70             :   addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
      71             :   addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
      72             :   addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
      73             :   addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
      74             :   addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
      75             :   addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
      76             : 
      77         572 :   computeRegisterProperties(Subtarget->getRegisterInfo());
      78             : 
      79             :   // Legalize loads and stores to the private address space.
      80             :   setOperationAction(ISD::LOAD, MVT::i32, Custom);
      81             :   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
      82             :   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
      83             : 
      84             :   // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
      85             :   // spaces, so it is custom lowered to handle those where it isn't.
      86        2002 :   for (MVT VT : MVT::integer_valuetypes()) {
      87             :     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
      88             :     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
      89             :     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
      90             : 
      91             :     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
      92             :     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
      93             :     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
      94             : 
      95             :     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
      96             :     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
      97             :     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
      98             :   }
      99             : 
     100             :   // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
     101             :   setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
     102             :   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
     103             :   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
     104             : 
     105             :   setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
     106             :   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
     107             :   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
     108             : 
     109             :   setOperationAction(ISD::STORE, MVT::i8, Custom);
     110             :   setOperationAction(ISD::STORE, MVT::i32, Custom);
     111             :   setOperationAction(ISD::STORE, MVT::v2i32, Custom);
     112             :   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
     113             : 
     114             :   setTruncStoreAction(MVT::i32, MVT::i8, Custom);
     115             :   setTruncStoreAction(MVT::i32, MVT::i16, Custom);
     116             :   // We need to include these since trunc STORES to PRIVATE need
     117             :   // special handling to accommodate RMW
     118             :   setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
     119             :   setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
     120             :   setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
     121             :   setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
     122             :   setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
     123             :   setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
     124             :   setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
     125             :   setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
     126             :   setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
     127             :   setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
     128             : 
     129             :   // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
     130             :   setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
     131             :   setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
     132             : 
     133             :   // Set condition code actions
     134             :   setCondCodeAction(ISD::SETO,   MVT::f32, Expand);
     135             :   setCondCodeAction(ISD::SETUO,  MVT::f32, Expand);
     136             :   setCondCodeAction(ISD::SETLT,  MVT::f32, Expand);
     137             :   setCondCodeAction(ISD::SETLE,  MVT::f32, Expand);
     138             :   setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
     139             :   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
     140             :   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
     141             :   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
     142             :   setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
     143             :   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
     144             :   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
     145             :   setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
     146             : 
     147             :   setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
     148             :   setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
     149             :   setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
     150             :   setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
     151             : 
     152             :   setOperationAction(ISD::FCOS, MVT::f32, Custom);
     153             :   setOperationAction(ISD::FSIN, MVT::f32, Custom);
     154             : 
     155             :   setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
     156             :   setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
     157             : 
     158             :   setOperationAction(ISD::BR_CC, MVT::i32, Expand);
     159             :   setOperationAction(ISD::BR_CC, MVT::f32, Expand);
     160             :   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
     161             : 
     162             :   setOperationAction(ISD::FSUB, MVT::f32, Expand);
     163             : 
     164             :   setOperationAction(ISD::FCEIL, MVT::f64, Custom);
     165             :   setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
     166             :   setOperationAction(ISD::FRINT, MVT::f64, Custom);
     167             :   setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
     168             : 
     169             :   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
     170             :   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
     171             : 
     172             :   setOperationAction(ISD::SETCC, MVT::i32, Expand);
     173             :   setOperationAction(ISD::SETCC, MVT::f32, Expand);
     174             :   setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
     175             :   setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
     176             :   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
     177             :   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
     178             : 
     179             :   setOperationAction(ISD::SELECT, MVT::i32, Expand);
     180             :   setOperationAction(ISD::SELECT, MVT::f32, Expand);
     181             :   setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
     182             :   setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
     183             : 
     184             :   // ADD, SUB overflow.
     185             :   // TODO: turn these into Legal?
     186         572 :   if (Subtarget->hasCARRY())
     187             :     setOperationAction(ISD::UADDO, MVT::i32, Custom);
     188             : 
     189         286 :   if (Subtarget->hasBORROW())
     190             :     setOperationAction(ISD::USUBO, MVT::i32, Custom);
     191             : 
     192             :   // Expand sign extension of vectors
     193         286 :   if (!Subtarget->hasBFE())
     194             :     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
     195             : 
     196             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
     197             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
     198             : 
     199         286 :   if (!Subtarget->hasBFE())
     200             :     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
     201             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
     202             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
     203             : 
     204         286 :   if (!Subtarget->hasBFE())
     205             :     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
     206             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
     207             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
     208             : 
     209             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
     210             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
     211             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
     212             : 
     213             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
     214             : 
     215             :   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
     216             : 
     217             :   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
     218             :   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
     219             :   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
     220             :   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
     221             : 
     222             :   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
     223             :   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
     224             :   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
     225             :   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
     226             : 
     227             :   // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
     228             :   //  to be Legal/Custom in order to avoid library calls.
     229             :   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
     230             :   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
     231             :   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
     232             : 
     233         286 :   if (!Subtarget->hasFMA()) {
     234             :     setOperationAction(ISD::FMA, MVT::f32, Expand);
     235             :     setOperationAction(ISD::FMA, MVT::f64, Expand);
     236             :   }
     237             :  
     238             :   // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
     239             :   // need it for R600.
     240         286 :   if (!Subtarget->hasFP32Denormals())
     241             :     setOperationAction(ISD::FMAD, MVT::f32, Legal);
     242             : 
     243         286 :   if (!Subtarget->hasBFI()) {
     244             :     // fcopysign can be done in a single instruction with BFI.
     245             :     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
     246             :     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
     247             :   }
     248             : 
     249         286 :   if (!Subtarget->hasBCNT(32))
     250             :     setOperationAction(ISD::CTPOP, MVT::i32, Expand);
     251             : 
     252             :   if (!Subtarget->hasBCNT(64))
     253             :     setOperationAction(ISD::CTPOP, MVT::i64, Expand);
     254             : 
     255         286 :   if (Subtarget->hasFFBH())
     256             :     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
     257             : 
     258         286 :   if (Subtarget->hasFFBL())
     259             :     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
     260             : 
     261             :   // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
     262             :   // need it for R600.
     263         286 :   if (Subtarget->hasBFE())
     264             :     setHasExtractBitsInsn(true);
     265             : 
     266             :   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
     267             : 
     268         286 :   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
     269        1430 :   for (MVT VT : ScalarIntVTs) {
     270             :     setOperationAction(ISD::ADDC, VT, Expand);
     271             :     setOperationAction(ISD::SUBC, VT, Expand);
     272             :     setOperationAction(ISD::ADDE, VT, Expand);
     273             :     setOperationAction(ISD::SUBE, VT, Expand);
     274             :   }
     275             : 
     276             :   // LLVM will expand these to atomic_cmp_swap(0)
     277             :   // and atomic_swap, respectively.
     278             :   setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
     279             :   setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
     280             : 
     281             :   // We need to custom lower some of the intrinsics
     282             :   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
     283             :   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
     284             : 
     285             :   setSchedulingPreference(Sched::Source);
     286             : 
     287             :   setTargetDAGCombine(ISD::FP_ROUND);
     288             :   setTargetDAGCombine(ISD::FP_TO_SINT);
     289             :   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
     290             :   setTargetDAGCombine(ISD::SELECT_CC);
     291             :   setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
     292             :   setTargetDAGCombine(ISD::LOAD);
     293         286 : }
     294             : 
     295        2452 : static inline bool isEOP(MachineBasicBlock::iterator I) {
     296        4904 :   if (std::next(I) == I->getParent()->end())
     297             :     return false;
     298        4888 :   return std::next(I)->getOpcode() == R600::RETURN;
     299             : }
     300             : 
     301             : MachineBasicBlock *
     302        9111 : R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     303             :                                                 MachineBasicBlock *BB) const {
     304        9111 :   MachineFunction *MF = BB->getParent();
     305        9111 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     306             :   MachineBasicBlock::iterator I = MI;
     307        9111 :   const R600InstrInfo *TII = Subtarget->getInstrInfo();
     308             : 
     309       18222 :   switch (MI.getOpcode()) {
     310         880 :   default:
     311             :     // Replace LDS_*_RET instruction that don't have any uses with the
     312             :     // equivalent LDS_*_NORET instruction.
     313         880 :     if (TII->isLDSRetInstr(MI.getOpcode())) {
     314        1760 :       int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
     315             :       assert(DstIdx != -1);
     316             :       MachineInstrBuilder NewMI;
     317             :       // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
     318             :       //        LDS_1A2D support and remove this special case.
     319        2670 :       if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
     320          30 :           MI.getOpcode() == R600::LDS_CMPST_RET)
     321             :         return BB;
     322             : 
     323          60 :       NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
     324          30 :                       TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
     325         300 :       for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
     326         270 :         NewMI.add(MI.getOperand(i));
     327             :       }
     328             :     } else {
     329           0 :       return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
     330             :     }
     331             :     break;
     332             : 
     333          20 :   case R600::FABS_R600: {
     334          40 :     MachineInstr *NewMI = TII->buildDefaultInstruction(
     335             :         *BB, I, R600::MOV, MI.getOperand(0).getReg(),
     336          40 :         MI.getOperand(1).getReg());
     337          20 :     TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
     338          20 :     break;
     339             :   }
     340             : 
     341          20 :   case R600::FNEG_R600: {
     342          40 :     MachineInstr *NewMI = TII->buildDefaultInstruction(
     343             :         *BB, I, R600::MOV, MI.getOperand(0).getReg(),
     344          40 :         MI.getOperand(1).getReg());
     345          20 :     TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
     346          20 :     break;
     347             :   }
     348             : 
     349           0 :   case R600::MASK_WRITE: {
     350           0 :     unsigned maskedRegister = MI.getOperand(0).getReg();
     351             :     assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
     352           0 :     MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
     353           0 :     TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
     354           0 :     break;
     355             :   }
     356             : 
     357          17 :   case R600::MOV_IMM_F32:
     358          34 :     TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
     359             :                                                             .getFPImm()
     360          17 :                                                             ->getValueAPF()
     361          34 :                                                             .bitcastToAPInt()
     362             :                                                             .getZExtValue());
     363          17 :     break;
     364             : 
     365         523 :   case R600::MOV_IMM_I32:
     366         523 :     TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
     367         523 :                      MI.getOperand(1).getImm());
     368         523 :     break;
     369             : 
     370           1 :   case R600::MOV_IMM_GLOBAL_ADDR: {
     371             :     //TODO: Perhaps combine this instruction with the next if possible
     372             :     auto MIB = TII->buildDefaultInstruction(
     373           2 :         *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
     374           1 :     int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
     375             :     //TODO: Ugh this is rather ugly
     376           2 :     MIB->getOperand(Idx) = MI.getOperand(1);
     377             :     break;
     378             :   }
     379             : 
     380        2741 :   case R600::CONST_COPY: {
     381        5482 :     MachineInstr *NewMI = TII->buildDefaultInstruction(
     382        8223 :         *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
     383        2741 :     TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
     384        2741 :                        MI.getOperand(1).getImm());
     385        2741 :     break;
     386             :   }
     387             : 
     388        2390 :   case R600::RAT_WRITE_CACHELESS_32_eg:
     389             :   case R600::RAT_WRITE_CACHELESS_64_eg:
     390             :   case R600::RAT_WRITE_CACHELESS_128_eg:
     391        7170 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
     392        2390 :         .add(MI.getOperand(0))
     393        2390 :         .add(MI.getOperand(1))
     394        2390 :         .addImm(isEOP(I)); // Set End of program bit
     395        2390 :     break;
     396             : 
     397           2 :   case R600::RAT_STORE_TYPED_eg:
     398           6 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
     399           2 :         .add(MI.getOperand(0))
     400           2 :         .add(MI.getOperand(1))
     401           2 :         .add(MI.getOperand(2))
     402           2 :         .addImm(isEOP(I)); // Set End of program bit
     403           2 :     break;
     404             : 
     405         135 :   case R600::BRANCH:
     406         540 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
     407         135 :         .add(MI.getOperand(0));
     408         135 :     break;
     409             : 
     410           0 :   case R600::BRANCH_COND_f32: {
     411             :     MachineInstr *NewMI =
     412           0 :         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
     413           0 :                 R600::PREDICATE_BIT)
     414           0 :             .add(MI.getOperand(1))
     415             :             .addImm(R600::PRED_SETNE)
     416             :             .addImm(0); // Flags
     417           0 :     TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
     418           0 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
     419           0 :         .add(MI.getOperand(0))
     420           0 :         .addReg(R600::PREDICATE_BIT, RegState::Kill);
     421           0 :     break;
     422             :   }
     423             : 
     424          86 :   case R600::BRANCH_COND_i32: {
     425             :     MachineInstr *NewMI =
     426         172 :         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
     427          86 :                 R600::PREDICATE_BIT)
     428          86 :             .add(MI.getOperand(1))
     429             :             .addImm(R600::PRED_SETNE_INT)
     430             :             .addImm(0); // Flags
     431          86 :     TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
     432         344 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
     433          86 :         .add(MI.getOperand(0))
     434          86 :         .addReg(R600::PREDICATE_BIT, RegState::Kill);
     435          86 :     break;
     436             :   }
     437             : 
     438          60 :   case R600::EG_ExportSwz:
     439             :   case R600::R600_ExportSwz: {
     440             :     // Instruction is left unmodified if its not the last one of its type
     441             :     bool isLastInstructionOfItsType = true;
     442          60 :     unsigned InstExportType = MI.getOperand(1).getImm();
     443         129 :     for (MachineBasicBlock::iterator NextExportInst = std::next(I),
     444         189 :          EndBlock = BB->end(); NextExportInst != EndBlock;
     445             :          NextExportInst = std::next(NextExportInst)) {
     446         276 :       if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
     447             :           NextExportInst->getOpcode() == R600::R600_ExportSwz) {
     448          23 :         unsigned CurrentInstExportType = NextExportInst->getOperand(1)
     449          23 :             .getImm();
     450          23 :         if (CurrentInstExportType == InstExportType) {
     451             :           isLastInstructionOfItsType = false;
     452             :           break;
     453             :         }
     454             :       }
     455             :     }
     456          60 :     bool EOP = isEOP(I);
     457          60 :     if (!EOP && !isLastInstructionOfItsType)
     458             :       return BB;
     459         102 :     unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
     460         153 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
     461          51 :         .add(MI.getOperand(0))
     462          51 :         .add(MI.getOperand(1))
     463          51 :         .add(MI.getOperand(2))
     464          51 :         .add(MI.getOperand(3))
     465          51 :         .add(MI.getOperand(4))
     466          51 :         .add(MI.getOperand(5))
     467          51 :         .add(MI.getOperand(6))
     468          51 :         .addImm(CfInst)
     469          51 :         .addImm(EOP);
     470          51 :     break;
     471             :   }
     472             :   case R600::RETURN: {
     473             :     return BB;
     474             :   }
     475             :   }
     476             : 
     477        6016 :   MI.eraseFromParent();
     478        6016 :   return BB;
     479             : }
     480             : 
     481             : //===----------------------------------------------------------------------===//
     482             : // Custom DAG Lowering Operations
     483             : //===----------------------------------------------------------------------===//
     484             : 
     485      103678 : SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     486      103678 :   MachineFunction &MF = DAG.getMachineFunction();
     487      103678 :   R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
     488      103678 :   switch (Op.getOpcode()) {
     489         450 :   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
     490       11048 :   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
     491           7 :   case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
     492          50 :   case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
     493          28 :   case ISD::SRA_PARTS:
     494          28 :   case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
     495          64 :   case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
     496         620 :   case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
     497          17 :   case ISD::FCOS:
     498          17 :   case ISD::FSIN: return LowerTrig(Op, DAG);
     499       16373 :   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
     500       33370 :   case ISD::STORE: return LowerSTORE(Op, DAG);
     501       39152 :   case ISD::LOAD: {
     502       39152 :     SDValue Result = LowerLOAD(Op, DAG);
     503             :     assert((!Result.getNode() ||
     504             :             Result.getNode()->getNumValues() == 2) &&
     505             :            "Load should return a value and a chain");
     506       39152 :     return Result;
     507             :   }
     508             : 
     509          86 :   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
     510          57 :   case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
     511        1606 :   case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
     512          82 :   case ISD::INTRINSIC_VOID: {
     513          82 :     SDValue Chain = Op.getOperand(0);
     514             :     unsigned IntrinsicID =
     515         164 :                          cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
     516          82 :     switch (IntrinsicID) {
     517          60 :     case Intrinsic::r600_store_swizzle: {
     518             :       SDLoc DL(Op);
     519             :       const SDValue Args[8] = {
     520             :         Chain,
     521             :         Op.getOperand(2), // Export Value
     522             :         Op.getOperand(3), // ArrayBase
     523             :         Op.getOperand(4), // Type
     524          60 :         DAG.getConstant(0, DL, MVT::i32), // SWZ_X
     525          60 :         DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
     526          60 :         DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
     527          60 :         DAG.getConstant(3, DL, MVT::i32) // SWZ_W
     528         360 :       };
     529          60 :       return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
     530             :     }
     531             : 
     532             :     // default for switch(IntrinsicID)
     533             :     default: break;
     534             :     }
     535             :     // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
     536             :     break;
     537             :   }
     538         668 :   case ISD::INTRINSIC_WO_CHAIN: {
     539             :     unsigned IntrinsicID =
     540        1336 :                          cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
     541         668 :     EVT VT = Op.getValueType();
     542             :     SDLoc DL(Op);
     543         668 :     switch (IntrinsicID) {
     544         276 :     case Intrinsic::r600_tex:
     545             :     case Intrinsic::r600_texc: {
     546             :       unsigned TextureOp;
     547         276 :       switch (IntrinsicID) {
     548             :       case Intrinsic::r600_tex:
     549             :         TextureOp = 0;
     550             :         break;
     551           7 :       case Intrinsic::r600_texc:
     552             :         TextureOp = 1;
     553           7 :         break;
     554           0 :       default:
     555           0 :         llvm_unreachable("unhandled texture operation");
     556             :       }
     557             : 
     558             :       SDValue TexArgs[19] = {
     559         552 :         DAG.getConstant(TextureOp, DL, MVT::i32),
     560             :         Op.getOperand(1),
     561         276 :         DAG.getConstant(0, DL, MVT::i32),
     562         276 :         DAG.getConstant(1, DL, MVT::i32),
     563         276 :         DAG.getConstant(2, DL, MVT::i32),
     564         276 :         DAG.getConstant(3, DL, MVT::i32),
     565             :         Op.getOperand(2),
     566             :         Op.getOperand(3),
     567             :         Op.getOperand(4),
     568         276 :         DAG.getConstant(0, DL, MVT::i32),
     569         276 :         DAG.getConstant(1, DL, MVT::i32),
     570         276 :         DAG.getConstant(2, DL, MVT::i32),
     571         276 :         DAG.getConstant(3, DL, MVT::i32),
     572             :         Op.getOperand(5),
     573             :         Op.getOperand(6),
     574             :         Op.getOperand(7),
     575             :         Op.getOperand(8),
     576             :         Op.getOperand(9),
     577             :         Op.getOperand(10)
     578        3312 :       };
     579         276 :       return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
     580             :     }
     581          32 :     case Intrinsic::r600_dot4: {
     582             :       SDValue Args[8] = {
     583             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
     584          64 :           DAG.getConstant(0, DL, MVT::i32)),
     585             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
     586          64 :           DAG.getConstant(0, DL, MVT::i32)),
     587             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
     588          64 :           DAG.getConstant(1, DL, MVT::i32)),
     589             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
     590          64 :           DAG.getConstant(1, DL, MVT::i32)),
     591             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
     592          64 :           DAG.getConstant(2, DL, MVT::i32)),
     593             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
     594          64 :           DAG.getConstant(2, DL, MVT::i32)),
     595             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
     596          64 :           DAG.getConstant(3, DL, MVT::i32)),
     597             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
     598          64 :           DAG.getConstant(3, DL, MVT::i32))
     599         256 :       };
     600          32 :       return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
     601             :     }
     602             : 
     603           2 :     case Intrinsic::r600_implicitarg_ptr: {
     604           2 :       MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
     605           2 :       uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);
     606           2 :       return DAG.getConstant(ByteOffset, DL, PtrVT);
     607             :     }
     608           1 :     case Intrinsic::r600_read_ngroups_x:
     609           1 :       return LowerImplicitParameter(DAG, VT, DL, 0);
     610           1 :     case Intrinsic::r600_read_ngroups_y:
     611           1 :       return LowerImplicitParameter(DAG, VT, DL, 1);
     612           1 :     case Intrinsic::r600_read_ngroups_z:
     613           1 :       return LowerImplicitParameter(DAG, VT, DL, 2);
     614           2 :     case Intrinsic::r600_read_global_size_x:
     615           2 :       return LowerImplicitParameter(DAG, VT, DL, 3);
     616           2 :     case Intrinsic::r600_read_global_size_y:
     617           2 :       return LowerImplicitParameter(DAG, VT, DL, 4);
     618           2 :     case Intrinsic::r600_read_global_size_z:
     619           2 :       return LowerImplicitParameter(DAG, VT, DL, 5);
     620           8 :     case Intrinsic::r600_read_local_size_x:
     621           8 :       return LowerImplicitParameter(DAG, VT, DL, 6);
     622          36 :     case Intrinsic::r600_read_local_size_y:
     623          36 :       return LowerImplicitParameter(DAG, VT, DL, 7);
     624          36 :     case Intrinsic::r600_read_local_size_z:
     625          36 :       return LowerImplicitParameter(DAG, VT, DL, 8);
     626             : 
     627           4 :     case Intrinsic::r600_read_tgid_x:
     628             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     629           4 :                                      R600::T1_X, VT);
     630           3 :     case Intrinsic::r600_read_tgid_y:
     631             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     632           3 :                                      R600::T1_Y, VT);
     633           3 :     case Intrinsic::r600_read_tgid_z:
     634             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     635           3 :                                      R600::T1_Z, VT);
     636         183 :     case Intrinsic::r600_read_tidig_x:
     637             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     638         183 :                                      R600::T0_X, VT);
     639          32 :     case Intrinsic::r600_read_tidig_y:
     640             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     641          32 :                                      R600::T0_Y, VT);
     642          32 :     case Intrinsic::r600_read_tidig_z:
     643             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     644          32 :                                      R600::T0_Z, VT);
     645             : 
     646             :     case Intrinsic::r600_recipsqrt_ieee:
     647           3 :       return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
     648             : 
     649             :     case Intrinsic::r600_recipsqrt_clamped:
     650           5 :       return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
     651           4 :     default:
     652           4 :       return Op;
     653             :     }
     654             : 
     655             :     // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
     656             :     break;
     657             :   }
     658             :   } // end switch(Op.getOpcode())
     659          22 :   return SDValue();
     660             : }
     661             : 
     662         105 : void R600TargetLowering::ReplaceNodeResults(SDNode *N,
     663             :                                             SmallVectorImpl<SDValue> &Results,
     664             :                                             SelectionDAG &DAG) const {
     665         210 :   switch (N->getOpcode()) {
     666          53 :   default:
     667          53 :     AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
     668          53 :     return;
     669             :   case ISD::FP_TO_UINT:
     670           9 :     if (N->getValueType(0) == MVT::i1) {
     671           4 :       Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
     672           2 :       return;
     673             :     }
     674             :     // Since we don't care about out of bounds values we can use FP_TO_SINT for
     675             :     // uints too. The DAGLegalizer code for uint considers some extra cases
     676             :     // which are not necessary here.
     677             :     LLVM_FALLTHROUGH;
     678             :   case ISD::FP_TO_SINT: {
     679          16 :     if (N->getValueType(0) == MVT::i1) {
     680           4 :       Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
     681           2 :       return;
     682             :     }
     683             : 
     684          14 :     SDValue Result;
     685          14 :     if (expandFP_TO_SINT(N, Result, DAG))
     686          14 :       Results.push_back(Result);
     687             :     return;
     688             :   }
     689             :   case ISD::SDIVREM: {
     690             :     SDValue Op = SDValue(N, 1);
     691          12 :     SDValue RES = LowerSDIVREM(Op, DAG);
     692          12 :     Results.push_back(RES);
     693          12 :     Results.push_back(RES.getValue(1));
     694             :     break;
     695             :   }
     696             :   case ISD::UDIVREM: {
     697             :     SDValue Op = SDValue(N, 0);
     698          22 :     LowerUDIVREM64(Op, DAG, Results);
     699             :     break;
     700             :   }
     701             :   }
     702             : }
     703             : 
     704          16 : SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
     705             :                                                    SDValue Vector) const {
     706             :   SDLoc DL(Vector);
     707          16 :   EVT VecVT = Vector.getValueType();
     708          16 :   EVT EltVT = VecVT.getVectorElementType();
     709             :   SmallVector<SDValue, 8> Args;
     710             : 
     711          64 :   for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
     712          48 :     Args.push_back(DAG.getNode(
     713             :         ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
     714         192 :         DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
     715             :   }
     716             : 
     717          32 :   return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
     718             : }
     719             : 
     720       11048 : SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     721             :                                                     SelectionDAG &DAG) const {
     722             :   SDLoc DL(Op);
     723       11048 :   SDValue Vector = Op.getOperand(0);
     724       11048 :   SDValue Index = Op.getOperand(1);
     725             : 
     726          42 :   if (isa<ConstantSDNode>(Index) ||
     727             :       Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
     728       11034 :     return Op;
     729             : 
     730          14 :   Vector = vectorToVerticalVector(DAG, Vector);
     731             :   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
     732          14 :                      Vector, Index);
     733             : }
     734             : 
     735           7 : SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
     736             :                                                    SelectionDAG &DAG) const {
     737             :   SDLoc DL(Op);
     738           7 :   SDValue Vector = Op.getOperand(0);
     739           7 :   SDValue Value = Op.getOperand(1);
     740           7 :   SDValue Index = Op.getOperand(2);
     741             : 
     742           3 :   if (isa<ConstantSDNode>(Index) ||
     743             :       Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
     744           6 :     return Op;
     745             : 
     746           1 :   Vector = vectorToVerticalVector(DAG, Vector);
     747             :   SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
     748           1 :                                Vector, Value, Index);
     749           1 :   return vectorToVerticalVector(DAG, Insert);
     750             : }
     751             : 
     752          57 : SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
     753             :                                                SDValue Op,
     754             :                                                SelectionDAG &DAG) const {
     755             :   GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
     756          57 :   if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
     757          42 :     return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
     758             : 
     759          15 :   const DataLayout &DL = DAG.getDataLayout();
     760          15 :   const GlobalValue *GV = GSD->getGlobal();
     761             :   MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
     762             : 
     763          30 :   SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
     764          30 :   return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
     765             : }
     766             : 
     767          17 : SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
     768             :   // On hw >= R700, COS/SIN input must be between -1. and 1.
     769             :   // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
     770          17 :   EVT VT = Op.getValueType();
     771          17 :   SDValue Arg = Op.getOperand(0);
     772             :   SDLoc DL(Op);
     773             : 
     774             :   // TODO: Should this propagate fast-math-flags?
     775             :   SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
     776             :       DAG.getNode(ISD::FADD, DL, VT,
     777             :         DAG.getNode(ISD::FMUL, DL, VT, Arg,
     778             :           DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
     779          34 :         DAG.getConstantFP(0.5, DL, MVT::f32)));
     780             :   unsigned TrigNode;
     781          17 :   switch (Op.getOpcode()) {
     782             :   case ISD::FCOS:
     783             :     TrigNode = AMDGPUISD::COS_HW;
     784             :     break;
     785          11 :   case ISD::FSIN:
     786             :     TrigNode = AMDGPUISD::SIN_HW;
     787          11 :     break;
     788           0 :   default:
     789           0 :     llvm_unreachable("Wrong trig opcode");
     790             :   }
     791             :   SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
     792             :       DAG.getNode(ISD::FADD, DL, VT, FractPart,
     793          17 :         DAG.getConstantFP(-0.5, DL, MVT::f32)));
     794          17 :   if (Gen >= R600Subtarget::R700)
     795          17 :     return TrigVal;
     796             :   // On R600 hw, COS/SIN input must be between -Pi and Pi.
     797             :   return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
     798           0 :       DAG.getConstantFP(3.14159265359, DL, MVT::f32));
     799             : }
     800             : 
     801          50 : SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
     802             :   SDLoc DL(Op);
     803          50 :   EVT VT = Op.getValueType();
     804             : 
     805          50 :   SDValue Lo = Op.getOperand(0);
     806          50 :   SDValue Hi = Op.getOperand(1);
     807          50 :   SDValue Shift = Op.getOperand(2);
     808          50 :   SDValue Zero = DAG.getConstant(0, DL, VT);
     809          50 :   SDValue One  = DAG.getConstant(1, DL, VT);
     810             : 
     811          50 :   SDValue Width  = DAG.getConstant(VT.getSizeInBits(), DL, VT);
     812          50 :   SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
     813          50 :   SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
     814          50 :   SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
     815             : 
     816             :   // The dance around Width1 is necessary for 0 special case.
     817             :   // Without it the CompShift might be 32, producing incorrect results in
     818             :   // Overflow. So we do the shift in two steps, the alternative is to
     819             :   // add a conditional to filter the special case.
     820             : 
     821          50 :   SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
     822          50 :   Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
     823             : 
     824          50 :   SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
     825          50 :   HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
     826          50 :   SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
     827             : 
     828          50 :   SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
     829          50 :   SDValue LoBig = Zero;
     830             : 
     831          50 :   Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
     832          50 :   Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
     833             : 
     834         100 :   return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
     835             : }
     836             : 
     837          28 : SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
     838             :   SDLoc DL(Op);
     839          28 :   EVT VT = Op.getValueType();
     840             : 
     841          28 :   SDValue Lo = Op.getOperand(0);
     842          28 :   SDValue Hi = Op.getOperand(1);
     843          28 :   SDValue Shift = Op.getOperand(2);
     844          28 :   SDValue Zero = DAG.getConstant(0, DL, VT);
     845          28 :   SDValue One  = DAG.getConstant(1, DL, VT);
     846             : 
     847             :   const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
     848             : 
     849          28 :   SDValue Width  = DAG.getConstant(VT.getSizeInBits(), DL, VT);
     850          28 :   SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
     851          28 :   SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
     852          28 :   SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
     853             : 
     854             :   // The dance around Width1 is necessary for 0 special case.
     855             :   // Without it the CompShift might be 32, producing incorrect results in
     856             :   // Overflow. So we do the shift in two steps, the alternative is to
     857             :   // add a conditional to filter the special case.
     858             : 
     859          28 :   SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
     860          28 :   Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
     861             : 
     862          28 :   SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
     863          28 :   SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
     864          28 :   LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
     865             : 
     866          28 :   SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
     867          35 :   SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
     868             : 
     869          28 :   Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
     870          28 :   Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
     871             : 
     872          56 :   return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
     873             : }
     874             : 
     875         684 : SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
     876             :                                           unsigned mainop, unsigned ovf) const {
     877             :   SDLoc DL(Op);
     878         684 :   EVT VT = Op.getValueType();
     879             : 
     880         684 :   SDValue Lo = Op.getOperand(0);
     881         684 :   SDValue Hi = Op.getOperand(1);
     882             : 
     883         684 :   SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
     884             :   // Extend sign.
     885         684 :   OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
     886        1368 :                     DAG.getValueType(MVT::i1));
     887             : 
     888         684 :   SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
     889             : 
     890        1368 :   return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
     891             : }
     892             : 
     893           2 : SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
     894             :   SDLoc DL(Op);
     895             :   return DAG.getNode(
     896             :       ISD::SETCC,
     897             :       DL,
     898             :       MVT::i1,
     899             :       Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
     900           8 :       DAG.getCondCode(ISD::SETEQ));
     901             : }
     902             : 
     903           2 : SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
     904             :   SDLoc DL(Op);
     905             :   return DAG.getNode(
     906             :       ISD::SETCC,
     907             :       DL,
     908             :       MVT::i1,
     909             :       Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
     910           8 :       DAG.getCondCode(ISD::SETEQ));
     911             : }
     912             : 
     913          89 : SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
     914             :                                                    const SDLoc &DL,
     915             :                                                    unsigned DwordOffset) const {
     916          89 :   unsigned ByteOffset = DwordOffset * 4;
     917          89 :   PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
     918          89 :                                       AMDGPUASI.CONSTANT_BUFFER_0);
     919             : 
     920             :   // We shouldn't be using an offset wider than 16-bits for implicit parameters.
     921             :   assert(isInt<16>(ByteOffset));
     922             : 
     923             :   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
     924             :                      DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
     925         267 :                      MachinePointerInfo(ConstantPointerNull::get(PtrType)));
     926             : }
     927             : 
     928       21274 : bool R600TargetLowering::isZero(SDValue Op) const {
     929             :   if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
     930        9779 :     return Cst->isNullValue();
     931             :   } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
     932         440 :     return CstFP->isZero();
     933             :   } else {
     934             :     return false;
     935             :   }
     936             : }
     937             : 
     938       32716 : bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
     939             :   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
     940         936 :     return CFP->isExactlyValue(1.0);
     941             :   }
     942       32248 :   return isAllOnesConstant(Op);
     943             : }
     944             : 
     945        5900 : bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
     946             :   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
     947         310 :     return CFP->getValueAPF().isZero();
     948             :   }
     949        5745 :   return isNullConstant(Op);
     950             : }
     951             : 
     952       16373 : SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     953             :   SDLoc DL(Op);
     954       16373 :   EVT VT = Op.getValueType();
     955             : 
     956       16373 :   SDValue LHS = Op.getOperand(0);
     957       16373 :   SDValue RHS = Op.getOperand(1);
     958       16373 :   SDValue True = Op.getOperand(2);
     959       16373 :   SDValue False = Op.getOperand(3);
     960       16373 :   SDValue CC = Op.getOperand(4);
     961             :   SDValue Temp;
     962             : 
     963             :   if (VT == MVT::f32) {
     964             :     DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
     965         379 :     SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
     966         379 :     if (MinMax)
     967          19 :       return MinMax;
     968             :   }
     969             : 
     970             :   // LHS and RHS are guaranteed to be the same value type
     971       16354 :   EVT CompareVT = LHS.getValueType();
     972             : 
     973             :   // Check if we can lower this to a native operation.
     974             : 
     975             :   // Try to lower to a SET* instruction:
     976             :   //
     977             :   // SET* can match the following patterns:
     978             :   //
     979             :   // select_cc f32, f32, -1,  0, cc_supported
     980             :   // select_cc f32, f32, 1.0f, 0.0f, cc_supported
     981             :   // select_cc i32, i32, -1,  0, cc_supported
     982             :   //
     983             : 
     984             :   // Move hardware True/False values to the correct operand.
     985       16354 :   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
     986             :   ISD::CondCode InverseCC =
     987       16354 :      ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
     988       16354 :   if (isHWTrueValue(False) && isHWFalseValue(True)) {
     989          82 :     if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
     990             :       std::swap(False, True);
     991          11 :       CC = DAG.getCondCode(InverseCC);
     992             :     } else {
     993          71 :       ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
     994          71 :       if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
     995             :         std::swap(False, True);
     996             :         std::swap(LHS, RHS);
     997          60 :         CC = DAG.getCondCode(SwapInvCC);
     998             :       }
     999             :     }
    1000             :   }
    1001             : 
    1002       16354 :   if (isHWTrueValue(True) && isHWFalseValue(False) &&
    1003             :       (CompareVT == VT || VT == MVT::i32)) {
    1004             :     // This can be matched by a SET* instruction.
    1005        5717 :     return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
    1006             :   }
    1007             : 
    1008             :   // Try to lower to a CND* instruction:
    1009             :   //
    1010             :   // CND* can match the following patterns:
    1011             :   //
    1012             :   // select_cc f32, 0.0, f32, f32, cc_supported
    1013             :   // select_cc f32, 0.0, i32, i32, cc_supported
    1014             :   // select_cc i32, 0,   f32, f32, cc_supported
    1015             :   // select_cc i32, 0,   i32, i32, cc_supported
    1016             :   //
    1017             : 
    1018             :   // Try to move the zero value to the RHS
    1019       10637 :   if (isZero(LHS)) {
    1020           3 :     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
    1021             :     // Try swapping the operands
    1022           3 :     ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
    1023           3 :     if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
    1024             :       std::swap(LHS, RHS);
    1025           0 :       CC = DAG.getCondCode(CCSwapped);
    1026             :     } else {
    1027             :       // Try inverting the conditon and then swapping the operands
    1028           3 :       ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
    1029           3 :       CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
    1030           3 :       if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
    1031             :         std::swap(True, False);
    1032             :         std::swap(LHS, RHS);
    1033           0 :         CC = DAG.getCondCode(CCSwapped);
    1034             :       }
    1035             :     }
    1036             :   }
    1037       10637 :   if (isZero(RHS)) {
    1038        9759 :     SDValue Cond = LHS;
    1039        9759 :     SDValue Zero = RHS;
    1040        9759 :     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
    1041        9759 :     if (CompareVT != VT) {
    1042             :       // Bitcast True / False to the correct types.  This will end up being
    1043             :       // a nop, but it allows us to define only a single pattern in the
    1044             :       // .TD files for each CND* instruction rather than having to have
    1045             :       // one pattern for integer True/False and one for fp True/False
    1046          55 :       True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
    1047          55 :       False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
    1048             :     }
    1049             : 
    1050             :     switch (CCOpcode) {
    1051             :     case ISD::SETONE:
    1052             :     case ISD::SETUNE:
    1053             :     case ISD::SETNE:
    1054        2149 :       CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
    1055             :       Temp = True;
    1056             :       True = False;
    1057             :       False = Temp;
    1058        2149 :       break;
    1059             :     default:
    1060             :       break;
    1061             :     }
    1062             :     SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
    1063             :         Cond, Zero,
    1064             :         True, False,
    1065        9759 :         DAG.getCondCode(CCOpcode));
    1066        9759 :     return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
    1067             :   }
    1068             : 
    1069             :   // If we make it this for it means we have no native instructions to handle
    1070             :   // this SELECT_CC, so we must lower it.
    1071         878 :   SDValue HWTrue, HWFalse;
    1072             : 
    1073             :   if (CompareVT == MVT::f32) {
    1074          60 :     HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
    1075          60 :     HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
    1076             :   } else if (CompareVT == MVT::i32) {
    1077         818 :     HWTrue = DAG.getConstant(-1, DL, CompareVT);
    1078         818 :     HWFalse = DAG.getConstant(0, DL, CompareVT);
    1079             :   }
    1080             :   else {
    1081           0 :     llvm_unreachable("Unhandled value type in LowerSELECT_CC");
    1082             :   }
    1083             : 
    1084             :   // Lower this unsupported SELECT_CC into a combination of two supported
    1085             :   // SELECT_CC operations.
    1086         878 :   SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
    1087             : 
    1088             :   return DAG.getNode(ISD::SELECT_CC, DL, VT,
    1089             :       Cond, HWFalse,
    1090             :       True, False,
    1091         878 :       DAG.getCondCode(ISD::SETNE));
    1092             : }
    1093             : 
    1094             : /// LLVM generates byte-addressed pointers.  For indirect addressing, we need to
    1095             : /// convert these pointers to a register index.  Each register holds
    1096             : /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
    1097             : /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
    1098             : /// for indirect addressing.
    1099           0 : SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
    1100             :                                                unsigned StackWidth,
    1101             :                                                SelectionDAG &DAG) const {
    1102             :   unsigned SRLPad;
    1103           0 :   switch(StackWidth) {
    1104             :   case 1:
    1105             :     SRLPad = 2;
    1106             :     break;
    1107           0 :   case 2:
    1108             :     SRLPad = 3;
    1109           0 :     break;
    1110           0 :   case 4:
    1111             :     SRLPad = 4;
    1112           0 :     break;
    1113           0 :   default: llvm_unreachable("Invalid stack width");
    1114             :   }
    1115             : 
    1116             :   SDLoc DL(Ptr);
    1117             :   return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
    1118           0 :                      DAG.getConstant(SRLPad, DL, MVT::i32));
    1119             : }
    1120             : 
    1121           0 : void R600TargetLowering::getStackAddress(unsigned StackWidth,
    1122             :                                          unsigned ElemIdx,
    1123             :                                          unsigned &Channel,
    1124             :                                          unsigned &PtrIncr) const {
    1125           0 :   switch (StackWidth) {
    1126           0 :   default:
    1127             :   case 1:
    1128           0 :     Channel = 0;
    1129           0 :     if (ElemIdx > 0) {
    1130           0 :       PtrIncr = 1;
    1131             :     } else {
    1132           0 :       PtrIncr = 0;
    1133             :     }
    1134             :     break;
    1135           0 :   case 2:
    1136           0 :     Channel = ElemIdx % 2;
    1137           0 :     if (ElemIdx == 2) {
    1138           0 :       PtrIncr = 1;
    1139             :     } else {
    1140           0 :       PtrIncr = 0;
    1141             :     }
    1142             :     break;
    1143           0 :   case 4:
    1144           0 :     Channel = ElemIdx;
    1145           0 :     PtrIncr = 0;
    1146           0 :     break;
    1147             :   }
    1148           0 : }
    1149             : 
    1150        1313 : SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
    1151             :                                                    SelectionDAG &DAG) const {
    1152             :   SDLoc DL(Store);
    1153             :   //TODO: Who creates the i8 stores?
    1154             :   assert(Store->isTruncatingStore()
    1155             :          || Store->getValue().getValueType() == MVT::i8);
    1156             :   assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
    1157             : 
    1158        1313 :   SDValue Mask;
    1159             :   if (Store->getMemoryVT() == MVT::i8) {
    1160             :     assert(Store->getAlignment() >= 1);
    1161         591 :     Mask = DAG.getConstant(0xff, DL, MVT::i32);
    1162             :   } else if (Store->getMemoryVT() == MVT::i16) {
    1163             :     assert(Store->getAlignment() >= 2);
    1164         722 :     Mask = DAG.getConstant(0xffff, DL, MVT::i32);
    1165             :   } else {
    1166           0 :     llvm_unreachable("Unsupported private trunc store");
    1167             :   }
    1168             : 
    1169        1313 :   SDValue OldChain = Store->getChain();
    1170        1313 :   bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
    1171             :   // Skip dummy
    1172        2626 :   SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
    1173        1313 :   SDValue BasePtr = Store->getBasePtr();
    1174        1313 :   SDValue Offset = Store->getOffset();
    1175        1313 :   EVT MemVT = Store->getMemoryVT();
    1176             : 
    1177        1313 :   SDValue LoadPtr = BasePtr;
    1178        1313 :   if (!Offset.isUndef()) {
    1179           0 :     LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
    1180             :   }
    1181             : 
    1182             :   // Get dword location
    1183             :   // TODO: this should be eliminated by the future SHR ptr, 2
    1184             :   SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
    1185        2626 :                             DAG.getConstant(0xfffffffc, DL, MVT::i32));
    1186             : 
    1187             :   // Load dword
    1188             :   // TODO: can we be smarter about machine pointer info?
    1189        1313 :   MachinePointerInfo PtrInfo(UndefValue::get(
    1190        1313 :       Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
    1191        1313 :   SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
    1192             : 
    1193        1313 :   Chain = Dst.getValue(1);
    1194             : 
    1195             :   // Get offset in dword
    1196             :   SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
    1197        2626 :                                 DAG.getConstant(0x3, DL, MVT::i32));
    1198             : 
    1199             :   // Convert byte offset to bit shift
    1200             :   SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
    1201        2626 :                                  DAG.getConstant(3, DL, MVT::i32));
    1202             : 
    1203             :   // TODO: Contrary to the name of the functiom,
    1204             :   // it also handles sub i32 non-truncating stores (like i1)
    1205             :   SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
    1206        1313 :                                   Store->getValue());
    1207             : 
    1208             :   // Mask the value to the right type
    1209        1313 :   SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
    1210             : 
    1211             :   // Shift the value in place
    1212             :   SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
    1213        1313 :                                      MaskedValue, ShiftAmt);
    1214             : 
    1215             :   // Shift the mask in place
    1216        1313 :   SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
    1217             : 
    1218             :   // Invert the mask. NOTE: if we had native ROL instructions we could
    1219             :   // use inverted mask
    1220        1313 :   DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
    1221             : 
    1222             :   // Cleanup the target bits
    1223        1313 :   Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
    1224             : 
    1225             :   // Add the new bits
    1226        1313 :   SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
    1227             : 
    1228             :   // Store dword
    1229             :   // TODO: Can we be smarter about MachinePointerInfo?
    1230        1313 :   SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
    1231             : 
    1232             :   // If we are part of expanded vector, make our neighbors depend on this store
    1233        1313 :   if (VectorTrunc) {
    1234             :     // Make all other vector elements depend on this store
    1235         868 :     Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
    1236         868 :     DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
    1237             :   }
    1238        2626 :   return NewStore;
    1239             : }
    1240             : 
    1241       33370 : SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    1242             :   StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
    1243             :   unsigned AS = StoreNode->getAddressSpace();
    1244             : 
    1245       33370 :   SDValue Chain = StoreNode->getChain();
    1246       33370 :   SDValue Ptr = StoreNode->getBasePtr();
    1247       33370 :   SDValue Value = StoreNode->getValue();
    1248             : 
    1249       33370 :   EVT VT = Value.getValueType();
    1250       33370 :   EVT MemVT = StoreNode->getMemoryVT();
    1251       33370 :   EVT PtrVT = Ptr.getValueType();
    1252             : 
    1253             :   SDLoc DL(Op);
    1254             : 
    1255             :   // Neither LOCAL nor PRIVATE can do vectors at the moment
    1256       57481 :   if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
    1257             :       VT.isVector()) {
    1258        1243 :     if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
    1259             :          StoreNode->isTruncatingStore()) {
    1260             :       // Add an extra level of chain to isolate this vector
    1261         243 :       SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
    1262             :       // TODO: can the chain be replaced without creating a new store?
    1263             :       SDValue NewStore = DAG.getTruncStore(
    1264         243 :           NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
    1265             :           MemVT, StoreNode->getAlignment(),
    1266         729 :           StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
    1267             :       StoreNode = cast<StoreSDNode>(NewStore);
    1268             :     }
    1269             : 
    1270         939 :     return scalarizeVectorStore(StoreNode, DAG);
    1271             :   }
    1272             : 
    1273       32431 :   unsigned Align = StoreNode->getAlignment();
    1274       32666 :   if (Align < MemVT.getStoreSize() &&
    1275         235 :       !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
    1276          24 :     return expandUnalignedStore(StoreNode, DAG);
    1277             :   }
    1278             : 
    1279             :   SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
    1280       32407 :                                   DAG.getConstant(2, DL, PtrVT));
    1281             : 
    1282       32407 :   if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
    1283             :     // It is beneficial to create MSKOR here instead of combiner to avoid
    1284             :     // artificial dependencies introduced by RMW
    1285        9253 :     if (StoreNode->isTruncatingStore()) {
    1286             :       assert(VT.bitsLE(MVT::i32));
    1287         214 :       SDValue MaskConstant;
    1288             :       if (MemVT == MVT::i8) {
    1289         115 :         MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
    1290             :       } else {
    1291             :         assert(MemVT == MVT::i16);
    1292             :         assert(StoreNode->getAlignment() >= 2);
    1293          99 :         MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
    1294             :       }
    1295             : 
    1296             :       SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
    1297         214 :                                       DAG.getConstant(0x00000003, DL, PtrVT));
    1298             :       SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
    1299         214 :                                      DAG.getConstant(3, DL, VT));
    1300             : 
    1301             :       // Put the mask in correct place
    1302         214 :       SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
    1303             : 
    1304             :       // Put the value bits in correct place
    1305         214 :       SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
    1306         214 :       SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
    1307             : 
    1308             :       // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
    1309             :       // vector instead.
    1310             :       SDValue Src[4] = {
    1311             :         ShiftedValue,
    1312         214 :         DAG.getConstant(0, DL, MVT::i32),
    1313         214 :         DAG.getConstant(0, DL, MVT::i32),
    1314             :         Mask
    1315         856 :       };
    1316         214 :       SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
    1317         214 :       SDValue Args[3] = { Chain, Input, DWordAddr };
    1318             :       return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
    1319             :                                      Op->getVTList(), Args, MemVT,
    1320         642 :                                      StoreNode->getMemOperand());
    1321       11960 :     } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
    1322             :       // Convert pointer from byte address to dword address.
    1323        2886 :       Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
    1324             : 
    1325        5772 :       if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
    1326           0 :         llvm_unreachable("Truncated and indexed stores not supported yet");
    1327             :       } else {
    1328        2886 :         Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
    1329             :       }
    1330        2886 :       return Chain;
    1331             :     }
    1332             :   }
    1333             : 
    1334             :   // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
    1335       29307 :   if (AS != AMDGPUASI.PRIVATE_ADDRESS)
    1336       18640 :     return SDValue();
    1337             : 
    1338       10667 :   if (MemVT.bitsLT(MVT::i32))
    1339        1313 :     return lowerPrivateTruncStore(StoreNode, DAG);
    1340             : 
    1341             :   // Standard i32+ store, tag it with DWORDADDR to note that the address
    1342             :   // has been shifted
    1343        9354 :   if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
    1344        2776 :     Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
    1345        2776 :     return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
    1346             :   }
    1347             : 
    1348             :   // Tagged i32+ stores will be matched by patterns
    1349        6578 :   return SDValue();
    1350             : }
    1351             : 
    1352             : // return (512 + (kc_bank << 12)
    1353             : static int
    1354             : ConstantAddressBlock(unsigned AddressSpace) {
    1355             :   switch (AddressSpace) {
    1356             :   case AMDGPUAS::CONSTANT_BUFFER_0:
    1357             :     return 512;
    1358             :   case AMDGPUAS::CONSTANT_BUFFER_1:
    1359             :     return 512 + 4096;
    1360             :   case AMDGPUAS::CONSTANT_BUFFER_2:
    1361             :     return 512 + 4096 * 2;
    1362             :   case AMDGPUAS::CONSTANT_BUFFER_3:
    1363             :     return 512 + 4096 * 3;
    1364             :   case AMDGPUAS::CONSTANT_BUFFER_4:
    1365             :     return 512 + 4096 * 4;
    1366             :   case AMDGPUAS::CONSTANT_BUFFER_5:
    1367             :     return 512 + 4096 * 5;
    1368             :   case AMDGPUAS::CONSTANT_BUFFER_6:
    1369             :     return 512 + 4096 * 6;
    1370             :   case AMDGPUAS::CONSTANT_BUFFER_7:
    1371             :     return 512 + 4096 * 7;
    1372             :   case AMDGPUAS::CONSTANT_BUFFER_8:
    1373             :     return 512 + 4096 * 8;
    1374             :   case AMDGPUAS::CONSTANT_BUFFER_9:
    1375             :     return 512 + 4096 * 9;
    1376             :   case AMDGPUAS::CONSTANT_BUFFER_10:
    1377             :     return 512 + 4096 * 10;
    1378             :   case AMDGPUAS::CONSTANT_BUFFER_11:
    1379             :     return 512 + 4096 * 11;
    1380             :   case AMDGPUAS::CONSTANT_BUFFER_12:
    1381             :     return 512 + 4096 * 12;
    1382             :   case AMDGPUAS::CONSTANT_BUFFER_13:
    1383             :     return 512 + 4096 * 13;
    1384             :   case AMDGPUAS::CONSTANT_BUFFER_14:
    1385             :     return 512 + 4096 * 14;
    1386             :   case AMDGPUAS::CONSTANT_BUFFER_15:
    1387             :     return 512 + 4096 * 15;
    1388             :   default:
    1389             :     return -1;
    1390             :   }
    1391             : }
    1392             : 
    1393        4054 : SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
    1394             :                                                 SelectionDAG &DAG) const {
    1395             :   SDLoc DL(Op);
    1396             :   LoadSDNode *Load = cast<LoadSDNode>(Op);
    1397             :   ISD::LoadExtType ExtType = Load->getExtensionType();
    1398        4054 :   EVT MemVT = Load->getMemoryVT();
    1399             :   assert(Load->getAlignment() >= MemVT.getStoreSize());
    1400             : 
    1401        4054 :   SDValue BasePtr = Load->getBasePtr();
    1402        4054 :   SDValue Chain = Load->getChain();
    1403        4054 :   SDValue Offset = Load->getOffset();
    1404             : 
    1405        4054 :   SDValue LoadPtr = BasePtr;
    1406        4054 :   if (!Offset.isUndef()) {
    1407           0 :     LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
    1408             :   }
    1409             : 
    1410             :   // Get dword location
    1411             :   // NOTE: this should be eliminated by the future SHR ptr, 2
    1412             :   SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
    1413        8108 :                             DAG.getConstant(0xfffffffc, DL, MVT::i32));
    1414             : 
    1415             :   // Load dword
    1416             :   // TODO: can we be smarter about machine pointer info?
    1417        4054 :   MachinePointerInfo PtrInfo(UndefValue::get(
    1418        4054 :       Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
    1419        4054 :   SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
    1420             : 
    1421             :   // Get offset within the register.
    1422             :   SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
    1423        8108 :                                 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
    1424             : 
    1425             :   // Bit offset of target byte (byteIdx * 8).
    1426             :   SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
    1427        8108 :                                  DAG.getConstant(3, DL, MVT::i32));
    1428             : 
    1429             :   // Shift to the right.
    1430        4054 :   SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
    1431             : 
    1432             :   // Eliminate the upper bits by setting them to ...
    1433        4054 :   EVT MemEltVT = MemVT.getScalarType();
    1434             : 
    1435        4054 :   if (ExtType == ISD::SEXTLOAD) { // ... ones.
    1436        1280 :     SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
    1437        1280 :     Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
    1438             :   } else { // ... or zeros.
    1439        2774 :     Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
    1440             :   }
    1441             : 
    1442             :   SDValue Ops[] = {
    1443             :     Ret,
    1444             :     Read.getValue(1) // This should be our output chain
    1445        4054 :   };
    1446             : 
    1447        8108 :   return DAG.getMergeValues(Ops, DL);
    1448             : }
    1449             : 
    1450       39152 : SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
    1451             :   LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
    1452             :   unsigned AS = LoadNode->getAddressSpace();
    1453       39152 :   EVT MemVT = LoadNode->getMemoryVT();
    1454             :   ISD::LoadExtType ExtType = LoadNode->getExtensionType();
    1455             : 
    1456       64774 :   if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
    1457       78304 :       ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
    1458        4054 :     return lowerPrivateExtLoad(Op, DAG);
    1459             :   }
    1460             : 
    1461             :   SDLoc DL(Op);
    1462       35098 :   EVT VT = Op.getValueType();
    1463       35098 :   SDValue Chain = LoadNode->getChain();
    1464       35098 :   SDValue Ptr = LoadNode->getBasePtr();
    1465             : 
    1466       30615 :   if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
    1467       91764 :       LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
    1468             :       VT.isVector()) {
    1469         374 :       return scalarizeVectorLoad(LoadNode, DAG);
    1470             :   }
    1471             : 
    1472             :   int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
    1473       12228 :   if (ConstantBlock > -1 &&
    1474         962 :       ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
    1475             :        (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
    1476             :     SDValue Result;
    1477       10066 :     if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
    1478        5159 :         isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
    1479             :         isa<ConstantSDNode>(Ptr)) {
    1480        5159 :       SDValue Slots[4];
    1481       46431 :       for (unsigned i = 0; i < 4; i++) {
    1482             :         // We want Const position encoded with the following formula :
    1483             :         // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
    1484             :         // const_index is Ptr computed by llvm using an alignment of 16.
    1485             :         // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
    1486             :         // then div by 4 at the ISel step
    1487             :         SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
    1488       41272 :             DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
    1489       20636 :         Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
    1490             :       }
    1491        5159 :       EVT NewVT = MVT::v4i32;
    1492             :       unsigned NumElements = 4;
    1493        5159 :       if (VT.isVector()) {
    1494         541 :         NewVT = VT;
    1495         541 :         NumElements = VT.getVectorNumElements();
    1496             :       }
    1497        5159 :       Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
    1498             :     } else {
    1499             :       // non-constant ptr can't be folded, keeps it as a v4f32 load
    1500           0 :       Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
    1501             :           DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
    1502             :                       DAG.getConstant(4, DL, MVT::i32)),
    1503           0 :                       DAG.getConstant(LoadNode->getAddressSpace() -
    1504             :                                       AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
    1505           0 :           );
    1506             :     }
    1507             : 
    1508        5159 :     if (!VT.isVector()) {
    1509        4618 :       Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
    1510        9236 :                            DAG.getConstant(0, DL, MVT::i32));
    1511             :     }
    1512             : 
    1513             :     SDValue MergedValues[2] = {
    1514             :       Result,
    1515             :       Chain
    1516        5159 :     };
    1517        5159 :     return DAG.getMergeValues(MergedValues, DL);
    1518             :   }
    1519             : 
    1520             :   // For most operations returning SDValue() will result in the node being
    1521             :   // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
    1522             :   // need to manually expand loads that may be legal in some address spaces and
    1523             :   // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
    1524             :   // compute shaders, since the data is sign extended when it is uploaded to the
    1525             :   // buffer. However SEXT loads from other address spaces are not supported, so
    1526             :   // we need to expand them here.
    1527       29565 :   if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
    1528         290 :     EVT MemVT = LoadNode->getMemoryVT();
    1529             :     assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
    1530             :     SDValue NewLoad = DAG.getExtLoad(
    1531         290 :         ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
    1532         580 :         LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
    1533             :     SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
    1534         290 :                               DAG.getValueType(MemVT));
    1535             : 
    1536         290 :     SDValue MergedValues[2] = { Res, Chain };
    1537         290 :     return DAG.getMergeValues(MergedValues, DL);
    1538             :   }
    1539             : 
    1540       29275 :   if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
    1541        7892 :     return SDValue();
    1542             :   }
    1543             : 
    1544             :   // DWORDADDR ISD marks already shifted address
    1545       21383 :   if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
    1546             :     assert(VT == MVT::i32);
    1547       11794 :     Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
    1548        5897 :     Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
    1549       11794 :     return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
    1550             :   }
    1551       15486 :   return SDValue();
    1552             : }
    1553             : 
    1554          86 : SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
    1555          86 :   SDValue Chain = Op.getOperand(0);
    1556          86 :   SDValue Cond  = Op.getOperand(1);
    1557          86 :   SDValue Jump  = Op.getOperand(2);
    1558             : 
    1559          86 :   return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
    1560         172 :                      Chain, Jump, Cond);
    1561             : }
    1562             : 
    1563        1606 : SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
    1564             :                                             SelectionDAG &DAG) const {
    1565        1606 :   MachineFunction &MF = DAG.getMachineFunction();
    1566        1606 :   const R600FrameLowering *TFL = Subtarget->getFrameLowering();
    1567             : 
    1568             :   FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
    1569             : 
    1570        1606 :   unsigned FrameIndex = FIN->getIndex();
    1571             :   unsigned IgnoredFrameReg;
    1572             :   unsigned Offset =
    1573        1606 :     TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
    1574        3212 :   return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
    1575        3212 :                          Op.getValueType());
    1576             : }
    1577             : 
    1578          50 : CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
    1579             :                                                   bool IsVarArg) const {
    1580          50 :   switch (CC) {
    1581             :   case CallingConv::AMDGPU_KERNEL:
    1582             :   case CallingConv::SPIR_KERNEL:
    1583             :   case CallingConv::C:
    1584             :   case CallingConv::Fast:
    1585             :   case CallingConv::Cold:
    1586             :     return CC_R600_Kernel;
    1587          50 :   case CallingConv::AMDGPU_VS:
    1588             :   case CallingConv::AMDGPU_GS:
    1589             :   case CallingConv::AMDGPU_PS:
    1590             :   case CallingConv::AMDGPU_CS:
    1591             :   case CallingConv::AMDGPU_HS:
    1592             :   case CallingConv::AMDGPU_ES:
    1593             :   case CallingConv::AMDGPU_LS:
    1594          50 :     return CC_R600;
    1595           0 :   default:
    1596           0 :     report_fatal_error("Unsupported calling convention.");
    1597             :   }
    1598             : }
    1599             : 
    1600             : /// XXX Only kernel functions are supported, so we can assume for now that
    1601             : /// every function is a kernel function, but in the future we should use
    1602             : /// separate calling conventions for kernel and non-kernel functions.
    1603        2239 : SDValue R600TargetLowering::LowerFormalArguments(
    1604             :     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
    1605             :     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
    1606             :     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
    1607             :   SmallVector<CCValAssign, 16> ArgLocs;
    1608             :   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
    1609        4478 :                  *DAG.getContext());
    1610        2239 :   MachineFunction &MF = DAG.getMachineFunction();
    1611             :   SmallVector<ISD::InputArg, 8> LocalIns;
    1612             : 
    1613        2239 :   if (AMDGPU::isShader(CallConv)) {
    1614          50 :     CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
    1615             :   } else {
    1616        2189 :     analyzeFormalArgumentsCompute(CCInfo, Ins);
    1617             :   }
    1618             : 
    1619        8337 :   for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
    1620        6098 :     CCValAssign &VA = ArgLocs[i];
    1621             :     const ISD::InputArg &In = Ins[i];
    1622             :     EVT VT = In.VT;
    1623             :     EVT MemVT = VA.getLocVT();
    1624       11832 :     if (!VT.isVector() && MemVT.isVector()) {
    1625             :       // Get load source type if scalarized.
    1626           0 :       MemVT = MemVT.getVectorElementType();
    1627             :     }
    1628             : 
    1629        6098 :     if (AMDGPU::isShader(CallConv)) {
    1630          65 :       unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
    1631          65 :       SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
    1632          65 :       InVals.push_back(Register);
    1633          65 :       continue;
    1634             :     }
    1635             : 
    1636        6033 :     PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
    1637        6033 :                                           AMDGPUASI.CONSTANT_BUFFER_0);
    1638             : 
    1639             :     // i64 isn't a legal type, so the register type used ends up as i32, which
    1640             :     // isn't expected here. It attempts to create this sextload, but it ends up
    1641             :     // being invalid. Somehow this seems to work with i64 arguments, but breaks
    1642             :     // for <1 x i64>.
    1643             : 
    1644             :     // The first 36 bytes of the input buffer contains information about
    1645             :     // thread group and global sizes.
    1646             :     ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
    1647        6033 :     if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
    1648             :       // FIXME: This should really check the extload type, but the handling of
    1649             :       // extload vector parameters seems to be broken.
    1650             : 
    1651             :       // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
    1652             :       Ext = ISD::SEXTLOAD;
    1653             :     }
    1654             : 
    1655             :     // Compute the offset from the value.
    1656             :     // XXX - I think PartOffset should give you this, but it seems to give the
    1657             :     // size of the register which isn't useful.
    1658             : 
    1659       12066 :     unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
    1660        6033 :     unsigned PartOffset = VA.getLocMemOffset();
    1661        6033 :     unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) +
    1662        6033 :                       VA.getLocMemOffset();
    1663             : 
    1664        6033 :     MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
    1665             :     SDValue Arg = DAG.getLoad(
    1666             :         ISD::UNINDEXED, Ext, VT, DL, Chain,
    1667             :         DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo,
    1668             :         MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
    1669             :                                         MachineMemOperand::MODereferenceable |
    1670       12066 :                                         MachineMemOperand::MOInvariant);
    1671             : 
    1672             :     // 4 is the preferred alignment for the CONSTANT memory space.
    1673        6033 :     InVals.push_back(Arg);
    1674             :   }
    1675        4478 :   return Chain;
    1676             : }
    1677             : 
    1678       35090 : EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
    1679             :                                            EVT VT) const {
    1680       35090 :    if (!VT.isVector())
    1681       35033 :      return MVT::i32;
    1682          57 :    return VT.changeVectorElementTypeToInteger();
    1683             : }
    1684             : 
    1685         101 : bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
    1686             :                                           const SelectionDAG &DAG) const {
    1687             :   // Local and Private addresses do not handle vectors. Limit to i32
    1688         101 :   if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
    1689         100 :     return (MemVT.getSizeInBits() <= 32);
    1690             :   }
    1691             :   return true;
    1692             : }
    1693             : 
    1694         844 : bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
    1695             :                                                         unsigned AddrSpace,
    1696             :                                                         unsigned Align,
    1697             :                                                         bool *IsFast) const {
    1698         844 :   if (IsFast)
    1699         609 :     *IsFast = false;
    1700             : 
    1701         844 :   if (!VT.isSimple() || VT == MVT::Other)
    1702             :     return false;
    1703             : 
    1704         843 :   if (VT.bitsLT(MVT::i32))
    1705             :     return false;
    1706             : 
    1707             :   // TODO: This is a rough estimate.
    1708         808 :   if (IsFast)
    1709         597 :     *IsFast = true;
    1710             : 
    1711         808 :   return VT.bitsGT(MVT::i32) && Align % 4 == 0;
    1712             : }
    1713             : 
    1714         392 : static SDValue CompactSwizzlableVector(
    1715             :   SelectionDAG &DAG, SDValue VectorEntry,
    1716             :   DenseMap<unsigned, unsigned> &RemapSwizzle) {
    1717             :   assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
    1718             :   assert(RemapSwizzle.empty());
    1719             :   SDValue NewBldVec[4] = {
    1720             :     VectorEntry.getOperand(0),
    1721             :     VectorEntry.getOperand(1),
    1722             :     VectorEntry.getOperand(2),
    1723             :     VectorEntry.getOperand(3)
    1724         392 :   };
    1725             : 
    1726        1960 :   for (unsigned i = 0; i < 4; i++) {
    1727        3136 :     if (NewBldVec[i].isUndef())
    1728             :       // We mask write here to teach later passes that the ith element of this
    1729             :       // vector is undef. Thus we can use it to reduce 128 bits reg usage,
    1730             :       // break false dependencies and additionnaly make assembly easier to read.
    1731         212 :       RemapSwizzle[i] = 7; // SEL_MASK_WRITE
    1732        1568 :     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
    1733          56 :       if (C->isZero()) {
    1734          27 :         RemapSwizzle[i] = 4; // SEL_0
    1735          27 :         NewBldVec[i] = DAG.getUNDEF(MVT::f32);
    1736           1 :       } else if (C->isExactlyValue(1.0)) {
    1737           1 :         RemapSwizzle[i] = 5; // SEL_1
    1738           1 :         NewBldVec[i] = DAG.getUNDEF(MVT::f32);
    1739             :       }
    1740             :     }
    1741             : 
    1742        3136 :     if (NewBldVec[i].isUndef())
    1743             :       continue;
    1744        4974 :     for (unsigned j = 0; j < i; j++) {
    1745             :       if (NewBldVec[i] == NewBldVec[j]) {
    1746          32 :         NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
    1747          16 :         RemapSwizzle[i] = j;
    1748             :         break;
    1749             :       }
    1750             :     }
    1751             :   }
    1752             : 
    1753         392 :   return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
    1754         784 :                             NewBldVec);
    1755             : }
    1756             : 
    1757         392 : static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
    1758             :                                 DenseMap<unsigned, unsigned> &RemapSwizzle) {
    1759             :   assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
    1760             :   assert(RemapSwizzle.empty());
    1761             :   SDValue NewBldVec[4] = {
    1762             :       VectorEntry.getOperand(0),
    1763             :       VectorEntry.getOperand(1),
    1764             :       VectorEntry.getOperand(2),
    1765             :       VectorEntry.getOperand(3)
    1766         392 :   };
    1767         392 :   bool isUnmovable[4] = { false, false, false, false };
    1768        1960 :   for (unsigned i = 0; i < 4; i++) {
    1769        1568 :     RemapSwizzle[i] = i;
    1770        3136 :     if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
    1771             :       unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
    1772          94 :           ->getZExtValue();
    1773          47 :       if (i == Idx)
    1774          39 :         isUnmovable[Idx] = true;
    1775             :     }
    1776             :   }
    1777             : 
    1778        1949 :   for (unsigned i = 0; i < 4; i++) {
    1779        3126 :     if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
    1780             :       unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
    1781          92 :           ->getZExtValue();
    1782          46 :       if (isUnmovable[Idx])
    1783          40 :         continue;
    1784             :       // Swap i and Idx
    1785           6 :       std::swap(NewBldVec[Idx], NewBldVec[i]);
    1786             :       std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
    1787           6 :       break;
    1788             :     }
    1789             :   }
    1790             : 
    1791         392 :   return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
    1792         784 :                             NewBldVec);
    1793             : }
    1794             : 
    1795         392 : SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
    1796             :                                             SelectionDAG &DAG,
    1797             :                                             const SDLoc &DL) const {
    1798             :   assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
    1799             :   // Old -> New swizzle values
    1800             :   DenseMap<unsigned, unsigned> SwizzleRemap;
    1801             : 
    1802         392 :   BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
    1803        3528 :   for (unsigned i = 0; i < 4; i++) {
    1804        4704 :     unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
    1805        1568 :     if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
    1806         119 :       Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
    1807             :   }
    1808             : 
    1809         392 :   SwizzleRemap.clear();
    1810         392 :   BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
    1811        3528 :   for (unsigned i = 0; i < 4; i++) {
    1812        4704 :     unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
    1813        1568 :     if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
    1814        1350 :       Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
    1815             :   }
    1816             : 
    1817         784 :   return BuildVector;
    1818             : }
    1819             : 
    1820             : //===----------------------------------------------------------------------===//
    1821             : // Custom DAG Optimizations
    1822             : //===----------------------------------------------------------------------===//
    1823             : 
    1824      201224 : SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
    1825             :                                               DAGCombinerInfo &DCI) const {
    1826      201224 :   SelectionDAG &DAG = DCI.DAG;
    1827             :   SDLoc DL(N);
    1828             : 
    1829      402448 :   switch (N->getOpcode()) {
    1830             :   // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
    1831           2 :   case ISD::FP_ROUND: {
    1832           2 :       SDValue Arg = N->getOperand(0);
    1833           2 :       if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
    1834             :         return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
    1835           2 :                            Arg.getOperand(0));
    1836             :       }
    1837             :       break;
    1838             :     }
    1839             : 
    1840             :   // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
    1841             :   // (i32 select_cc f32, f32, -1, 0 cc)
    1842             :   //
    1843             :   // Mesa's GLSL frontend generates the above pattern a lot and we can lower
    1844             :   // this to one of the SET*_DX10 instructions.
    1845          58 :   case ISD::FP_TO_SINT: {
    1846          58 :     SDValue FNeg = N->getOperand(0);
    1847          58 :     if (FNeg.getOpcode() != ISD::FNEG) {
    1848          42 :       return SDValue();
    1849             :     }
    1850          16 :     SDValue SelectCC = FNeg.getOperand(0);
    1851             :     if (SelectCC.getOpcode() != ISD::SELECT_CC ||
    1852           8 :         SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
    1853          16 :         SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
    1854          32 :         !isHWTrueValue(SelectCC.getOperand(2)) ||
    1855           8 :         !isHWFalseValue(SelectCC.getOperand(3))) {
    1856           8 :       return SDValue();
    1857             :     }
    1858             : 
    1859             :     return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
    1860             :                            SelectCC.getOperand(0), // LHS
    1861             :                            SelectCC.getOperand(1), // RHS
    1862             :                            DAG.getConstant(-1, DL, MVT::i32), // True
    1863             :                            DAG.getConstant(0, DL, MVT::i32),  // False
    1864          32 :                            SelectCC.getOperand(4)); // CC
    1865             : 
    1866             :     break;
    1867             :   }
    1868             : 
    1869             :   // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
    1870             :   // => build_vector elt0, ... , NewEltIdx, ... , eltN
    1871         276 :   case ISD::INSERT_VECTOR_ELT: {
    1872         276 :     SDValue InVec = N->getOperand(0);
    1873         276 :     SDValue InVal = N->getOperand(1);
    1874         276 :     SDValue EltNo = N->getOperand(2);
    1875             : 
    1876             :     // If the inserted element is an UNDEF, just use the input vector.
    1877         276 :     if (InVal.isUndef())
    1878           0 :       return InVec;
    1879             : 
    1880         276 :     EVT VT = InVec.getValueType();
    1881             : 
    1882             :     // If we can't generate a legal BUILD_VECTOR, exit
    1883             :     if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
    1884           8 :       return SDValue();
    1885             : 
    1886             :     // Check that we know which element is being inserted
    1887             :     if (!isa<ConstantSDNode>(EltNo))
    1888           2 :       return SDValue();
    1889         266 :     unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
    1890             : 
    1891             :     // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
    1892             :     // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
    1893             :     // vector elements.
    1894             :     SmallVector<SDValue, 8> Ops;
    1895         266 :     if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
    1896           8 :       Ops.append(InVec.getNode()->op_begin(),
    1897             :                  InVec.getNode()->op_end());
    1898         262 :     } else if (InVec.isUndef()) {
    1899           0 :       unsigned NElts = VT.getVectorNumElements();
    1900           0 :       Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
    1901             :     } else {
    1902         262 :       return SDValue();
    1903             :     }
    1904             : 
    1905             :     // Insert the element
    1906           4 :     if (Elt < Ops.size()) {
    1907             :       // All the operands of BUILD_VECTOR must have the same type;
    1908             :       // we enforce that here.
    1909           8 :       EVT OpVT = Ops[0].getValueType();
    1910           0 :       if (InVal.getValueType() != OpVT)
    1911           0 :         InVal = OpVT.bitsGT(InVal.getValueType()) ?
    1912           0 :           DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
    1913           0 :           DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
    1914           4 :       Ops[Elt] = InVal;
    1915             :     }
    1916             : 
    1917             :     // Return the new vector
    1918           4 :     return DAG.getBuildVector(VT, DL, Ops);
    1919             :   }
    1920             : 
    1921             :   // Extract_vec (Build_vector) generated by custom lowering
    1922             :   // also needs to be customly combined
    1923       11665 :   case ISD::EXTRACT_VECTOR_ELT: {
    1924       11665 :     SDValue Arg = N->getOperand(0);
    1925       11665 :     if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
    1926             :       if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
    1927           0 :         unsigned Element = Const->getZExtValue();
    1928           0 :         return Arg->getOperand(Element);
    1929             :       }
    1930             :     }
    1931         214 :     if (Arg.getOpcode() == ISD::BITCAST &&
    1932       11909 :         Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
    1933       11725 :         (Arg.getOperand(0).getValueType().getVectorNumElements() ==
    1934       11695 :          Arg.getValueType().getVectorNumElements())) {
    1935             :       if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
    1936          29 :         unsigned Element = Const->getZExtValue();
    1937             :         return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
    1938          58 :                            Arg->getOperand(0).getOperand(Element));
    1939             :       }
    1940             :     }
    1941             :     break;
    1942             :   }
    1943             : 
    1944       11343 :   case ISD::SELECT_CC: {
    1945             :     // Try common optimizations
    1946       11343 :     if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
    1947           0 :       return Ret;
    1948             : 
    1949             :     // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
    1950             :     //      selectcc x, y, a, b, inv(cc)
    1951             :     //
    1952             :     // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
    1953             :     //      selectcc x, y, a, b, cc
    1954       11343 :     SDValue LHS = N->getOperand(0);
    1955       11343 :     if (LHS.getOpcode() != ISD::SELECT_CC) {
    1956        5598 :       return SDValue();
    1957             :     }
    1958             : 
    1959        5745 :     SDValue RHS = N->getOperand(1);
    1960        5745 :     SDValue True = N->getOperand(2);
    1961        5745 :     SDValue False = N->getOperand(3);
    1962        5745 :     ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
    1963             : 
    1964        5771 :     if (LHS.getOperand(2).getNode() != True.getNode() ||
    1965        5757 :         LHS.getOperand(3).getNode() != False.getNode() ||
    1966             :         RHS.getNode() != False.getNode()) {
    1967        5741 :       return SDValue();
    1968             :     }
    1969             : 
    1970           4 :     switch (NCC) {
    1971           0 :     default: return SDValue();
    1972           1 :     case ISD::SETNE: return LHS;
    1973           3 :     case ISD::SETEQ: {
    1974           3 :       ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
    1975           3 :       LHSCC = ISD::getSetCCInverse(LHSCC,
    1976           9 :                                   LHS.getOperand(0).getValueType().isInteger());
    1977           9 :       if (DCI.isBeforeLegalizeOps() ||
    1978             :           isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
    1979             :         return DAG.getSelectCC(DL,
    1980             :                                LHS.getOperand(0),
    1981             :                                LHS.getOperand(1),
    1982             :                                LHS.getOperand(2),
    1983             :                                LHS.getOperand(3),
    1984           0 :                                LHSCC);
    1985             :       break;
    1986             :     }
    1987             :     }
    1988           3 :     return SDValue();
    1989             :   }
    1990             : 
    1991         138 :   case AMDGPUISD::R600_EXPORT: {
    1992         138 :     SDValue Arg = N->getOperand(1);
    1993         138 :     if (Arg.getOpcode() != ISD::BUILD_VECTOR)
    1994             :       break;
    1995             : 
    1996             :     SDValue NewArgs[8] = {
    1997             :       N->getOperand(0), // Chain
    1998             :       SDValue(),
    1999             :       N->getOperand(2), // ArrayBase
    2000             :       N->getOperand(3), // Type
    2001             :       N->getOperand(4), // SWZ_X
    2002             :       N->getOperand(5), // SWZ_Y
    2003             :       N->getOperand(6), // SWZ_Z
    2004             :       N->getOperand(7) // SWZ_W
    2005         120 :     };
    2006         120 :     NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
    2007         240 :     return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
    2008             :   }
    2009         296 :   case AMDGPUISD::TEXTURE_FETCH: {
    2010         296 :     SDValue Arg = N->getOperand(1);
    2011         296 :     if (Arg.getOpcode() != ISD::BUILD_VECTOR)
    2012             :       break;
    2013             : 
    2014             :     SDValue NewArgs[19] = {
    2015             :       N->getOperand(0),
    2016             :       N->getOperand(1),
    2017             :       N->getOperand(2),
    2018             :       N->getOperand(3),
    2019             :       N->getOperand(4),
    2020             :       N->getOperand(5),
    2021             :       N->getOperand(6),
    2022             :       N->getOperand(7),
    2023             :       N->getOperand(8),
    2024             :       N->getOperand(9),
    2025             :       N->getOperand(10),
    2026             :       N->getOperand(11),
    2027             :       N->getOperand(12),
    2028             :       N->getOperand(13),
    2029             :       N->getOperand(14),
    2030             :       N->getOperand(15),
    2031             :       N->getOperand(16),
    2032             :       N->getOperand(17),
    2033             :       N->getOperand(18),
    2034         272 :     };
    2035         272 :     NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
    2036         544 :     return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
    2037             :   }
    2038             :   default: break;
    2039             :   }
    2040             : 
    2041      189125 :   return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
    2042             : }
    2043             : 
    2044      240510 : bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
    2045             :                                      SDValue &Src, SDValue &Neg, SDValue &Abs,
    2046             :                                      SDValue &Sel, SDValue &Imm,
    2047             :                                      SelectionDAG &DAG) const {
    2048      240510 :   const R600InstrInfo *TII = Subtarget->getInstrInfo();
    2049      481020 :   if (!Src.isMachineOpcode())
    2050             :     return false;
    2051             : 
    2052      154468 :   switch (Src.getMachineOpcode()) {
    2053         125 :   case R600::FNEG_R600:
    2054         125 :     if (!Neg.getNode())
    2055             :       return false;
    2056         101 :     Src = Src.getOperand(0);
    2057         202 :     Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
    2058         101 :     return true;
    2059         109 :   case R600::FABS_R600:
    2060         109 :     if (!Abs.getNode())
    2061             :       return false;
    2062          93 :     Src = Src.getOperand(0);
    2063         186 :     Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
    2064          93 :     return true;
    2065       10610 :   case R600::CONST_COPY: {
    2066       10610 :     unsigned Opcode = ParentNode->getMachineOpcode();
    2067       10610 :     bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
    2068             : 
    2069       10610 :     if (!Sel.getNode())
    2070             :       return false;
    2071             : 
    2072       18530 :     SDValue CstOffset = Src.getOperand(0);
    2073       27795 :     if (ParentNode->getValueType(0).isVector())
    2074             :       return false;
    2075             : 
    2076             :     // Gather constants values
    2077             :     int SrcIndices[] = {
    2078        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src0),
    2079        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src1),
    2080        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src2),
    2081        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src0_X),
    2082        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
    2083        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
    2084        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src0_W),
    2085        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src1_X),
    2086        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
    2087        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
    2088        9265 :       TII->getOperandIdx(Opcode, R600::OpName::src1_W)
    2089      101915 :     };
    2090             :     std::vector<unsigned> Consts;
    2091      213095 :     for (int OtherSrcIdx : SrcIndices) {
    2092      101915 :       int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
    2093      101915 :       if (OtherSrcIdx < 0 || OtherSelIdx < 0)
    2094       83153 :         continue;
    2095       18762 :       if (HasDst) {
    2096       18762 :         OtherSrcIdx--;
    2097       18762 :         OtherSelIdx--;
    2098             :       }
    2099             :       if (RegisterSDNode *Reg =
    2100       18762 :           dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
    2101         726 :         if (Reg->getReg() == R600::ALU_CONST) {
    2102             :           ConstantSDNode *Cst
    2103         557 :             = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
    2104        1671 :           Consts.push_back(Cst->getZExtValue());
    2105             :         }
    2106             :       }
    2107             :     }
    2108             : 
    2109             :     ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
    2110       27795 :     Consts.push_back(Cst->getZExtValue());
    2111        9265 :     if (!TII->fitsConstReadLimitations(Consts)) {
    2112             :       return false;
    2113             :     }
    2114             : 
    2115        9245 :     Sel = CstOffset;
    2116        9245 :     Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
    2117        9245 :     return true;
    2118             :   }
    2119             :   case R600::MOV_IMM_GLOBAL_ADDR:
    2120             :     // Check if the Imm slot is used. Taken from below.
    2121          28 :     if (cast<ConstantSDNode>(Imm)->getZExtValue())
    2122             :       return false;
    2123          14 :     Imm = Src.getOperand(0);
    2124          14 :     Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
    2125          14 :     return true;
    2126       27387 :   case R600::MOV_IMM_I32:
    2127             :   case R600::MOV_IMM_F32: {
    2128             :     unsigned ImmReg = R600::ALU_LITERAL_X;
    2129             :     uint64_t ImmValue = 0;
    2130             : 
    2131       27387 :     if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
    2132             :       ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
    2133        1056 :       float FloatValue = FPC->getValueAPF().convertToFloat();
    2134         528 :       if (FloatValue == 0.0) {
    2135             :         ImmReg = R600::ZERO;
    2136         386 :       } else if (FloatValue == 0.5) {
    2137             :         ImmReg = R600::HALF;
    2138         349 :       } else if (FloatValue == 1.0) {
    2139             :         ImmReg = R600::ONE;
    2140             :       } else {
    2141         792 :         ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
    2142             :       }
    2143             :     } else {
    2144             :       ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
    2145       26859 :       uint64_t Value = C->getZExtValue();
    2146       26859 :       if (Value == 0) {
    2147             :         ImmReg = R600::ZERO;
    2148       25166 :       } else if (Value == 1) {
    2149             :         ImmReg = R600::ONE_INT;
    2150             :       } else {
    2151             :         ImmValue = Value;
    2152             :       }
    2153             :     }
    2154             : 
    2155             :     // Check that we aren't already using an immediate.
    2156             :     // XXX: It's possible for an instruction to have more than one
    2157             :     // immediate operand, but this is not supported yet.
    2158             :     if (ImmReg == R600::ALU_LITERAL_X) {
    2159       22559 :       if (!Imm.getNode())
    2160             :         return false;
    2161             :       ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
    2162             :       assert(C);
    2163       45006 :       if (C->getZExtValue())
    2164             :         return false;
    2165       40518 :       Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
    2166             :     }
    2167       25087 :     Src = DAG.getRegister(ImmReg, MVT::i32);
    2168       25087 :     return true;
    2169             :   }
    2170             :   default:
    2171             :     return false;
    2172             :   }
    2173             : }
    2174             : 
    2175             : /// Fold the instructions after selecting them
    2176      162139 : SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
    2177             :                                             SelectionDAG &DAG) const {
    2178      162139 :   const R600InstrInfo *TII = Subtarget->getInstrInfo();
    2179      162139 :   if (!Node->isMachineOpcode())
    2180             :     return Node;
    2181             : 
    2182             :   unsigned Opcode = Node->getMachineOpcode();
    2183      162139 :   SDValue FakeOp;
    2184             : 
    2185      162139 :   std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
    2186             : 
    2187      162139 :   if (Opcode == R600::DOT_4) {
    2188             :     int OperandIdx[] = {
    2189         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_X),
    2190         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
    2191         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
    2192         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_W),
    2193         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_X),
    2194         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
    2195         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
    2196         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_W)
    2197        1184 :         };
    2198             :     int NegIdx[] = {
    2199         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
    2200         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
    2201         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
    2202         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
    2203         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
    2204         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
    2205         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
    2206         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
    2207        1184 :     };
    2208             :     int AbsIdx[] = {
    2209         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
    2210         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
    2211         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
    2212         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
    2213         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
    2214         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
    2215         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
    2216         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
    2217        1184 :     };
    2218        1992 :     for (unsigned i = 0; i < 8; i++) {
    2219        1002 :       if (OperandIdx[i] < 0)
    2220          80 :         return Node;
    2221        1002 :       SDValue &Src = Ops[OperandIdx[i] - 1];
    2222        1002 :       SDValue &Neg = Ops[NegIdx[i] - 1];
    2223        1002 :       SDValue &Abs = Ops[AbsIdx[i] - 1];
    2224        1002 :       bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
    2225        1002 :       int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
    2226        1002 :       if (HasDst)
    2227        1002 :         SelIdx--;
    2228        1002 :       SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
    2229        1002 :       if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
    2230         240 :         return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
    2231             :     }
    2232      161991 :   } else if (Opcode == R600::REG_SEQUENCE) {
    2233       49848 :     for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
    2234       20333 :       SDValue &Src = Ops[i];
    2235       20333 :       if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
    2236        3885 :         return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
    2237             :     }
    2238             :   } else {
    2239      156105 :     if (!TII->hasInstrModifiers(Opcode))
    2240      141782 :       return Node;
    2241             :     int OperandIdx[] = {
    2242      106033 :       TII->getOperandIdx(Opcode, R600::OpName::src0),
    2243      106033 :       TII->getOperandIdx(Opcode, R600::OpName::src1),
    2244      106033 :       TII->getOperandIdx(Opcode, R600::OpName::src2)
    2245      318099 :     };
    2246             :     int NegIdx[] = {
    2247      106033 :       TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
    2248      106033 :       TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
    2249      106033 :       TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
    2250      318099 :     };
    2251      106033 :     int AbsIdx[] = {
    2252      106033 :       TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
    2253      106033 :       TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
    2254             :       -1
    2255      212066 :     };
    2256      478053 :     for (unsigned i = 0; i < 3; i++) {
    2257      277720 :       if (OperandIdx[i] < 0)
    2258       91710 :         return Node;
    2259      219175 :       SDValue &Src = Ops[OperandIdx[i] - 1];
    2260      219175 :       SDValue &Neg = Ops[NegIdx[i] - 1];
    2261      219175 :       SDValue FakeAbs;
    2262      219175 :       SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
    2263      219175 :       bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
    2264      219175 :       int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
    2265      219175 :       int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
    2266      219175 :       if (HasDst) {
    2267      219175 :         SelIdx--;
    2268      219175 :         ImmIdx--;
    2269             :       }
    2270      219175 :       SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
    2271      219175 :       SDValue &Imm = Ops[ImmIdx];
    2272      219175 :       if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
    2273       99495 :         return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
    2274             :     }
    2275             :   }
    2276             : 
    2277             :   return Node;
    2278             : }

Generated by: LCOV version 1.13