LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - R600ISelLowering.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 836 948 88.2 %
Date: 2018-10-20 13:21:21 Functions: 36 40 90.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Custom DAG lowering for R600
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "R600ISelLowering.h"
      16             : #include "AMDGPUFrameLowering.h"
      17             : #include "AMDGPUSubtarget.h"
      18             : #include "R600Defines.h"
      19             : #include "R600FrameLowering.h"
      20             : #include "R600InstrInfo.h"
      21             : #include "R600MachineFunctionInfo.h"
      22             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      23             : #include "Utils/AMDGPUBaseInfo.h"
      24             : #include "llvm/ADT/APFloat.h"
      25             : #include "llvm/ADT/APInt.h"
      26             : #include "llvm/ADT/ArrayRef.h"
      27             : #include "llvm/ADT/DenseMap.h"
      28             : #include "llvm/ADT/SmallVector.h"
      29             : #include "llvm/CodeGen/CallingConvLower.h"
      30             : #include "llvm/CodeGen/DAGCombine.h"
      31             : #include "llvm/CodeGen/ISDOpcodes.h"
      32             : #include "llvm/CodeGen/MachineBasicBlock.h"
      33             : #include "llvm/CodeGen/MachineFunction.h"
      34             : #include "llvm/CodeGen/MachineInstr.h"
      35             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      36             : #include "llvm/CodeGen/MachineMemOperand.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/SelectionDAG.h"
      39             : #include "llvm/IR/Constants.h"
      40             : #include "llvm/IR/DerivedTypes.h"
      41             : #include "llvm/Support/Casting.h"
      42             : #include "llvm/Support/Compiler.h"
      43             : #include "llvm/Support/ErrorHandling.h"
      44             : #include "llvm/Support/MachineValueType.h"
      45             : #include <cassert>
      46             : #include <cstdint>
      47             : #include <iterator>
      48             : #include <utility>
      49             : #include <vector>
      50             : 
      51             : using namespace llvm;
      52             : 
      53             : #include "R600GenCallingConv.inc"
      54             : 
      55         291 : R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
      56         291 :                                        const R600Subtarget &STI)
      57         291 :     : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
      58             :   addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
      59             :   addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
      60             :   addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
      61             :   addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
      62             :   addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
      63             :   addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
      64             : 
      65         582 :   computeRegisterProperties(Subtarget->getRegisterInfo());
      66             : 
      67             :   // Legalize loads and stores to the private address space.
      68             :   setOperationAction(ISD::LOAD, MVT::i32, Custom);
      69             :   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
      70             :   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
      71             : 
      72             :   // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
      73             :   // spaces, so it is custom lowered to handle those where it isn't.
      74        2037 :   for (MVT VT : MVT::integer_valuetypes()) {
      75             :     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
      76             :     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
      77             :     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
      78             : 
      79             :     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
      80             :     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
      81             :     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
      82             : 
      83             :     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
      84             :     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
      85             :     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
      86             :   }
      87             : 
      88             :   // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
      89             :   setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
      90             :   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
      91             :   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
      92             : 
      93             :   setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
      94             :   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
      95             :   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
      96             : 
      97             :   setOperationAction(ISD::STORE, MVT::i8, Custom);
      98             :   setOperationAction(ISD::STORE, MVT::i32, Custom);
      99             :   setOperationAction(ISD::STORE, MVT::v2i32, Custom);
     100             :   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
     101             : 
     102             :   setTruncStoreAction(MVT::i32, MVT::i8, Custom);
     103             :   setTruncStoreAction(MVT::i32, MVT::i16, Custom);
     104             :   // We need to include these since trunc STORES to PRIVATE need
     105             :   // special handling to accommodate RMW
     106             :   setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
     107             :   setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
     108             :   setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
     109             :   setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
     110             :   setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
     111             :   setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
     112             :   setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
     113             :   setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
     114             :   setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
     115             :   setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
     116             : 
     117             :   // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
     118             :   setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
     119             :   setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
     120             : 
     121             :   // Set condition code actions
     122             :   setCondCodeAction(ISD::SETO,   MVT::f32, Expand);
     123             :   setCondCodeAction(ISD::SETUO,  MVT::f32, Expand);
     124             :   setCondCodeAction(ISD::SETLT,  MVT::f32, Expand);
     125             :   setCondCodeAction(ISD::SETLE,  MVT::f32, Expand);
     126             :   setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
     127             :   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
     128             :   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
     129             :   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
     130             :   setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
     131             :   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
     132             :   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
     133             :   setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
     134             : 
     135             :   setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
     136             :   setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
     137             :   setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
     138             :   setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
     139             : 
     140             :   setOperationAction(ISD::FCOS, MVT::f32, Custom);
     141             :   setOperationAction(ISD::FSIN, MVT::f32, Custom);
     142             : 
     143             :   setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
     144             :   setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
     145             : 
     146             :   setOperationAction(ISD::BR_CC, MVT::i32, Expand);
     147             :   setOperationAction(ISD::BR_CC, MVT::f32, Expand);
     148             :   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
     149             : 
     150             :   setOperationAction(ISD::FSUB, MVT::f32, Expand);
     151             : 
     152             :   setOperationAction(ISD::FCEIL, MVT::f64, Custom);
     153             :   setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
     154             :   setOperationAction(ISD::FRINT, MVT::f64, Custom);
     155             :   setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
     156             : 
     157             :   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
     158             :   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
     159             : 
     160             :   setOperationAction(ISD::SETCC, MVT::i32, Expand);
     161             :   setOperationAction(ISD::SETCC, MVT::f32, Expand);
     162             :   setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
     163             :   setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
     164             :   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
     165             :   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
     166             : 
     167             :   setOperationAction(ISD::SELECT, MVT::i32, Expand);
     168             :   setOperationAction(ISD::SELECT, MVT::f32, Expand);
     169             :   setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
     170             :   setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
     171             : 
     172             :   // ADD, SUB overflow.
     173             :   // TODO: turn these into Legal?
     174         582 :   if (Subtarget->hasCARRY())
     175             :     setOperationAction(ISD::UADDO, MVT::i32, Custom);
     176             : 
     177         291 :   if (Subtarget->hasBORROW())
     178             :     setOperationAction(ISD::USUBO, MVT::i32, Custom);
     179             : 
     180             :   // Expand sign extension of vectors
     181         291 :   if (!Subtarget->hasBFE())
     182             :     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
     183             : 
     184             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
     185             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
     186             : 
     187         291 :   if (!Subtarget->hasBFE())
     188             :     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
     189             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
     190             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
     191             : 
     192         291 :   if (!Subtarget->hasBFE())
     193             :     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
     194             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
     195             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
     196             : 
     197             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
     198             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
     199             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
     200             : 
     201             :   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
     202             : 
     203             :   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
     204             : 
     205             :   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
     206             :   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
     207             :   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
     208             :   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
     209             : 
     210             :   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
     211             :   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
     212             :   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
     213             :   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
     214             : 
     215             :   // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
     216             :   //  to be Legal/Custom in order to avoid library calls.
     217             :   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
     218             :   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
     219             :   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
     220             : 
     221         291 :   if (!Subtarget->hasFMA()) {
     222             :     setOperationAction(ISD::FMA, MVT::f32, Expand);
     223             :     setOperationAction(ISD::FMA, MVT::f64, Expand);
     224             :   }
     225             : 
     226             :   // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
     227             :   // need it for R600.
     228         291 :   if (!Subtarget->hasFP32Denormals())
     229             :     setOperationAction(ISD::FMAD, MVT::f32, Legal);
     230             : 
     231         291 :   if (!Subtarget->hasBFI()) {
     232             :     // fcopysign can be done in a single instruction with BFI.
     233             :     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
     234             :     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
     235             :   }
     236             : 
     237         291 :   if (!Subtarget->hasBCNT(32))
     238             :     setOperationAction(ISD::CTPOP, MVT::i32, Expand);
     239             : 
     240             :   if (!Subtarget->hasBCNT(64))
     241             :     setOperationAction(ISD::CTPOP, MVT::i64, Expand);
     242             : 
     243         291 :   if (Subtarget->hasFFBH())
     244             :     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
     245             : 
     246         291 :   if (Subtarget->hasFFBL())
     247             :     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
     248             : 
     249             :   // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
     250             :   // need it for R600.
     251         291 :   if (Subtarget->hasBFE())
     252             :     setHasExtractBitsInsn(true);
     253             : 
     254             :   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
     255             : 
     256         291 :   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
     257         873 :   for (MVT VT : ScalarIntVTs) {
     258             :     setOperationAction(ISD::ADDC, VT, Expand);
     259             :     setOperationAction(ISD::SUBC, VT, Expand);
     260             :     setOperationAction(ISD::ADDE, VT, Expand);
     261             :     setOperationAction(ISD::SUBE, VT, Expand);
     262             :   }
     263             : 
     264             :   // LLVM will expand these to atomic_cmp_swap(0)
     265             :   // and atomic_swap, respectively.
     266             :   setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
     267             :   setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
     268             : 
     269             :   // We need to custom lower some of the intrinsics
     270             :   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
     271             :   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
     272             : 
     273             :   setSchedulingPreference(Sched::Source);
     274             : 
     275             :   setTargetDAGCombine(ISD::FP_ROUND);
     276             :   setTargetDAGCombine(ISD::FP_TO_SINT);
     277             :   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
     278             :   setTargetDAGCombine(ISD::SELECT_CC);
     279             :   setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
     280             :   setTargetDAGCombine(ISD::LOAD);
     281         291 : }
     282             : 
     283        2516 : static inline bool isEOP(MachineBasicBlock::iterator I) {
     284        2516 :   if (std::next(I) == I->getParent()->end())
     285             :     return false;
     286        5016 :   return std::next(I)->getOpcode() == R600::RETURN;
     287             : }
     288             : 
     289             : MachineBasicBlock *
     290        9301 : R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     291             :                                                 MachineBasicBlock *BB) const {
     292        9301 :   MachineFunction *MF = BB->getParent();
     293        9301 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     294             :   MachineBasicBlock::iterator I = MI;
     295        9301 :   const R600InstrInfo *TII = Subtarget->getInstrInfo();
     296             : 
     297       18602 :   switch (MI.getOpcode()) {
     298         912 :   default:
     299             :     // Replace LDS_*_RET instruction that don't have any uses with the
     300             :     // equivalent LDS_*_NORET instruction.
     301         912 :     if (TII->isLDSRetInstr(MI.getOpcode())) {
     302        1824 :       int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
     303             :       assert(DstIdx != -1);
     304             :       MachineInstrBuilder NewMI;
     305             :       // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
     306             :       //        LDS_1A2D support and remove this special case.
     307        1824 :       if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
     308          30 :           MI.getOpcode() == R600::LDS_CMPST_RET)
     309             :         return BB;
     310             : 
     311          30 :       NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
     312          30 :                       TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
     313         300 :       for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
     314         270 :         NewMI.add(MI.getOperand(i));
     315             :       }
     316             :     } else {
     317           0 :       return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
     318             :     }
     319             :     break;
     320             : 
     321          20 :   case R600::FABS_R600: {
     322          20 :     MachineInstr *NewMI = TII->buildDefaultInstruction(
     323             :         *BB, I, R600::MOV, MI.getOperand(0).getReg(),
     324          20 :         MI.getOperand(1).getReg());
     325          20 :     TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
     326          20 :     break;
     327             :   }
     328             : 
     329          20 :   case R600::FNEG_R600: {
     330          20 :     MachineInstr *NewMI = TII->buildDefaultInstruction(
     331             :         *BB, I, R600::MOV, MI.getOperand(0).getReg(),
     332          20 :         MI.getOperand(1).getReg());
     333          20 :     TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
     334          20 :     break;
     335             :   }
     336             : 
     337           0 :   case R600::MASK_WRITE: {
     338           0 :     unsigned maskedRegister = MI.getOperand(0).getReg();
     339             :     assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
     340           0 :     MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
     341           0 :     TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
     342           0 :     break;
     343             :   }
     344             : 
     345          17 :   case R600::MOV_IMM_F32:
     346          17 :     TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
     347             :                                                             .getFPImm()
     348          17 :                                                             ->getValueAPF()
     349          17 :                                                             .bitcastToAPInt()
     350             :                                                             .getZExtValue());
     351          17 :     break;
     352             : 
     353         545 :   case R600::MOV_IMM_I32:
     354         545 :     TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
     355         545 :                      MI.getOperand(1).getImm());
     356         545 :     break;
     357             : 
     358           1 :   case R600::MOV_IMM_GLOBAL_ADDR: {
     359             :     //TODO: Perhaps combine this instruction with the next if possible
     360             :     auto MIB = TII->buildDefaultInstruction(
     361           2 :         *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
     362           1 :     int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
     363             :     //TODO: Ugh this is rather ugly
     364           2 :     MIB->getOperand(Idx) = MI.getOperand(1);
     365             :     break;
     366             :   }
     367             : 
     368        2754 :   case R600::CONST_COPY: {
     369        2754 :     MachineInstr *NewMI = TII->buildDefaultInstruction(
     370        5508 :         *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
     371        2754 :     TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
     372        2754 :                        MI.getOperand(1).getImm());
     373        2754 :     break;
     374             :   }
     375             : 
     376        2454 :   case R600::RAT_WRITE_CACHELESS_32_eg:
     377             :   case R600::RAT_WRITE_CACHELESS_64_eg:
     378             :   case R600::RAT_WRITE_CACHELESS_128_eg:
     379        2454 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
     380        2454 :         .add(MI.getOperand(0))
     381        2454 :         .add(MI.getOperand(1))
     382        2454 :         .addImm(isEOP(I)); // Set End of program bit
     383        2454 :     break;
     384             : 
     385           2 :   case R600::RAT_STORE_TYPED_eg:
     386           2 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
     387           2 :         .add(MI.getOperand(0))
     388           2 :         .add(MI.getOperand(1))
     389           2 :         .add(MI.getOperand(2))
     390           2 :         .addImm(isEOP(I)); // Set End of program bit
     391           2 :     break;
     392             : 
     393         135 :   case R600::BRANCH:
     394         270 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
     395         135 :         .add(MI.getOperand(0));
     396         135 :     break;
     397             : 
     398           0 :   case R600::BRANCH_COND_f32: {
     399             :     MachineInstr *NewMI =
     400           0 :         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
     401           0 :                 R600::PREDICATE_BIT)
     402           0 :             .add(MI.getOperand(1))
     403             :             .addImm(R600::PRED_SETNE)
     404             :             .addImm(0); // Flags
     405           0 :     TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
     406           0 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
     407           0 :         .add(MI.getOperand(0))
     408           0 :         .addReg(R600::PREDICATE_BIT, RegState::Kill);
     409           0 :     break;
     410             :   }
     411             : 
     412          86 :   case R600::BRANCH_COND_i32: {
     413             :     MachineInstr *NewMI =
     414          86 :         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
     415          86 :                 R600::PREDICATE_BIT)
     416          86 :             .add(MI.getOperand(1))
     417             :             .addImm(R600::PRED_SETNE_INT)
     418             :             .addImm(0); // Flags
     419          86 :     TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
     420         172 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
     421          86 :         .add(MI.getOperand(0))
     422          86 :         .addReg(R600::PREDICATE_BIT, RegState::Kill);
     423          86 :     break;
     424             :   }
     425             : 
     426          60 :   case R600::EG_ExportSwz:
     427             :   case R600::R600_ExportSwz: {
     428             :     // Instruction is left unmodified if its not the last one of its type
     429             :     bool isLastInstructionOfItsType = true;
     430          60 :     unsigned InstExportType = MI.getOperand(1).getImm();
     431          60 :     for (MachineBasicBlock::iterator NextExportInst = std::next(I),
     432         189 :          EndBlock = BB->end(); NextExportInst != EndBlock;
     433         129 :          NextExportInst = std::next(NextExportInst)) {
     434         276 :       if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
     435             :           NextExportInst->getOpcode() == R600::R600_ExportSwz) {
     436          23 :         unsigned CurrentInstExportType = NextExportInst->getOperand(1)
     437          23 :             .getImm();
     438          23 :         if (CurrentInstExportType == InstExportType) {
     439             :           isLastInstructionOfItsType = false;
     440             :           break;
     441             :         }
     442             :       }
     443             :     }
     444          60 :     bool EOP = isEOP(I);
     445          60 :     if (!EOP && !isLastInstructionOfItsType)
     446             :       return BB;
     447         102 :     unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
     448          51 :     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
     449          51 :         .add(MI.getOperand(0))
     450          51 :         .add(MI.getOperand(1))
     451          51 :         .add(MI.getOperand(2))
     452          51 :         .add(MI.getOperand(3))
     453          51 :         .add(MI.getOperand(4))
     454          51 :         .add(MI.getOperand(5))
     455          51 :         .add(MI.getOperand(6))
     456          51 :         .addImm(CfInst)
     457          51 :         .addImm(EOP);
     458          51 :     break;
     459             :   }
     460             :   case R600::RETURN: {
     461             :     return BB;
     462             :   }
     463             :   }
     464             : 
     465        6115 :   MI.eraseFromParent();
     466        6115 :   return BB;
     467             : }
     468             : 
     469             : //===----------------------------------------------------------------------===//
     470             : // Custom DAG Lowering Operations
     471             : //===----------------------------------------------------------------------===//
     472             : 
     473      100158 : SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     474      100158 :   MachineFunction &MF = DAG.getMachineFunction();
     475      100158 :   R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
     476      100158 :   switch (Op.getOpcode()) {
     477         440 :   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
     478       10478 :   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
     479           7 :   case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
     480          50 :   case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
     481          28 :   case ISD::SRA_PARTS:
     482          28 :   case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
     483          64 :   case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
     484         620 :   case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
     485          17 :   case ISD::FCOS:
     486          17 :   case ISD::FSIN: return LowerTrig(Op, DAG);
     487       16360 :   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
     488       33975 :   case ISD::STORE: return LowerSTORE(Op, DAG);
     489       35619 :   case ISD::LOAD: {
     490       35619 :     SDValue Result = LowerLOAD(Op, DAG);
     491             :     assert((!Result.getNode() ||
     492             :             Result.getNode()->getNumValues() == 2) &&
     493             :            "Load should return a value and a chain");
     494       35619 :     return Result;
     495             :   }
     496             : 
     497          86 :   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
     498          57 :   case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
     499        1606 :   case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
     500          82 :   case ISD::INTRINSIC_VOID: {
     501          82 :     SDValue Chain = Op.getOperand(0);
     502             :     unsigned IntrinsicID =
     503         164 :                          cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
     504             :     switch (IntrinsicID) {
     505          60 :     case Intrinsic::r600_store_swizzle: {
     506             :       SDLoc DL(Op);
     507             :       const SDValue Args[8] = {
     508             :         Chain,
     509             :         Op.getOperand(2), // Export Value
     510             :         Op.getOperand(3), // ArrayBase
     511             :         Op.getOperand(4), // Type
     512          60 :         DAG.getConstant(0, DL, MVT::i32), // SWZ_X
     513          60 :         DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
     514          60 :         DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
     515          60 :         DAG.getConstant(3, DL, MVT::i32) // SWZ_W
     516         120 :       };
     517          60 :       return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
     518             :     }
     519             : 
     520             :     // default for switch(IntrinsicID)
     521             :     default: break;
     522             :     }
     523             :     // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
     524             :     break;
     525             :   }
     526         669 :   case ISD::INTRINSIC_WO_CHAIN: {
     527             :     unsigned IntrinsicID =
     528        1338 :                          cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
     529         669 :     EVT VT = Op.getValueType();
     530             :     SDLoc DL(Op);
     531         669 :     switch (IntrinsicID) {
     532         276 :     case Intrinsic::r600_tex:
     533             :     case Intrinsic::r600_texc: {
     534             :       unsigned TextureOp;
     535             :       switch (IntrinsicID) {
     536             :       case Intrinsic::r600_tex:
     537             :         TextureOp = 0;
     538             :         break;
     539           7 :       case Intrinsic::r600_texc:
     540             :         TextureOp = 1;
     541           7 :         break;
     542           0 :       default:
     543           0 :         llvm_unreachable("unhandled texture operation");
     544             :       }
     545             : 
     546             :       SDValue TexArgs[19] = {
     547         276 :         DAG.getConstant(TextureOp, DL, MVT::i32),
     548             :         Op.getOperand(1),
     549         276 :         DAG.getConstant(0, DL, MVT::i32),
     550         276 :         DAG.getConstant(1, DL, MVT::i32),
     551         276 :         DAG.getConstant(2, DL, MVT::i32),
     552         276 :         DAG.getConstant(3, DL, MVT::i32),
     553             :         Op.getOperand(2),
     554             :         Op.getOperand(3),
     555             :         Op.getOperand(4),
     556         276 :         DAG.getConstant(0, DL, MVT::i32),
     557         276 :         DAG.getConstant(1, DL, MVT::i32),
     558         276 :         DAG.getConstant(2, DL, MVT::i32),
     559         276 :         DAG.getConstant(3, DL, MVT::i32),
     560             :         Op.getOperand(5),
     561             :         Op.getOperand(6),
     562             :         Op.getOperand(7),
     563             :         Op.getOperand(8),
     564             :         Op.getOperand(9),
     565             :         Op.getOperand(10)
     566         828 :       };
     567         276 :       return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
     568             :     }
     569          32 :     case Intrinsic::r600_dot4: {
     570             :       SDValue Args[8] = {
     571             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
     572          32 :           DAG.getConstant(0, DL, MVT::i32)),
     573             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
     574          32 :           DAG.getConstant(0, DL, MVT::i32)),
     575             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
     576          32 :           DAG.getConstant(1, DL, MVT::i32)),
     577             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
     578          32 :           DAG.getConstant(1, DL, MVT::i32)),
     579             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
     580          32 :           DAG.getConstant(2, DL, MVT::i32)),
     581             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
     582          32 :           DAG.getConstant(2, DL, MVT::i32)),
     583             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
     584          32 :           DAG.getConstant(3, DL, MVT::i32)),
     585             :       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
     586          32 :           DAG.getConstant(3, DL, MVT::i32))
     587             :       };
     588          32 :       return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
     589             :     }
     590             : 
     591           2 :     case Intrinsic::r600_implicitarg_ptr: {
     592           2 :       MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
     593           2 :       uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);
     594           2 :       return DAG.getConstant(ByteOffset, DL, PtrVT);
     595             :     }
     596           1 :     case Intrinsic::r600_read_ngroups_x:
     597           1 :       return LowerImplicitParameter(DAG, VT, DL, 0);
     598           1 :     case Intrinsic::r600_read_ngroups_y:
     599           1 :       return LowerImplicitParameter(DAG, VT, DL, 1);
     600           1 :     case Intrinsic::r600_read_ngroups_z:
     601           1 :       return LowerImplicitParameter(DAG, VT, DL, 2);
     602           2 :     case Intrinsic::r600_read_global_size_x:
     603           2 :       return LowerImplicitParameter(DAG, VT, DL, 3);
     604           2 :     case Intrinsic::r600_read_global_size_y:
     605           2 :       return LowerImplicitParameter(DAG, VT, DL, 4);
     606           2 :     case Intrinsic::r600_read_global_size_z:
     607           2 :       return LowerImplicitParameter(DAG, VT, DL, 5);
     608           8 :     case Intrinsic::r600_read_local_size_x:
     609           8 :       return LowerImplicitParameter(DAG, VT, DL, 6);
     610          36 :     case Intrinsic::r600_read_local_size_y:
     611          36 :       return LowerImplicitParameter(DAG, VT, DL, 7);
     612          36 :     case Intrinsic::r600_read_local_size_z:
     613          36 :       return LowerImplicitParameter(DAG, VT, DL, 8);
     614             : 
     615           4 :     case Intrinsic::r600_read_tgid_x:
     616             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     617           4 :                                      R600::T1_X, VT);
     618           3 :     case Intrinsic::r600_read_tgid_y:
     619             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     620           3 :                                      R600::T1_Y, VT);
     621           3 :     case Intrinsic::r600_read_tgid_z:
     622             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     623           3 :                                      R600::T1_Z, VT);
     624         184 :     case Intrinsic::r600_read_tidig_x:
     625             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     626         184 :                                      R600::T0_X, VT);
     627          32 :     case Intrinsic::r600_read_tidig_y:
     628             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     629          32 :                                      R600::T0_Y, VT);
     630          32 :     case Intrinsic::r600_read_tidig_z:
     631             :       return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
     632          32 :                                      R600::T0_Z, VT);
     633             : 
     634             :     case Intrinsic::r600_recipsqrt_ieee:
     635           3 :       return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
     636             : 
     637             :     case Intrinsic::r600_recipsqrt_clamped:
     638           5 :       return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
     639           4 :     default:
     640           4 :       return Op;
     641             :     }
     642             : 
     643             :     // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
     644             :     break;
     645             :   }
     646             :   } // end switch(Op.getOpcode())
     647          22 :   return SDValue();
     648             : }
     649             : 
     650          95 : void R600TargetLowering::ReplaceNodeResults(SDNode *N,
     651             :                                             SmallVectorImpl<SDValue> &Results,
     652             :                                             SelectionDAG &DAG) const {
     653         190 :   switch (N->getOpcode()) {
     654          43 :   default:
     655          43 :     AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
     656          43 :     return;
     657             :   case ISD::FP_TO_UINT:
     658           9 :     if (N->getValueType(0) == MVT::i1) {
     659           4 :       Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
     660           2 :       return;
     661             :     }
     662             :     // Since we don't care about out of bounds values we can use FP_TO_SINT for
     663             :     // uints too. The DAGLegalizer code for uint considers some extra cases
     664             :     // which are not necessary here.
     665             :     LLVM_FALLTHROUGH;
     666             :   case ISD::FP_TO_SINT: {
     667          16 :     if (N->getValueType(0) == MVT::i1) {
     668           4 :       Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
     669           2 :       return;
     670             :     }
     671             : 
     672          14 :     SDValue Result;
     673          14 :     if (expandFP_TO_SINT(N, Result, DAG))
     674          14 :       Results.push_back(Result);
     675             :     return;
     676             :   }
     677             :   case ISD::SDIVREM: {
     678             :     SDValue Op = SDValue(N, 1);
     679          12 :     SDValue RES = LowerSDIVREM(Op, DAG);
     680          12 :     Results.push_back(RES);
     681          12 :     Results.push_back(RES.getValue(1));
     682             :     break;
     683             :   }
     684             :   case ISD::UDIVREM: {
     685             :     SDValue Op = SDValue(N, 0);
     686          22 :     LowerUDIVREM64(Op, DAG, Results);
     687             :     break;
     688             :   }
     689             :   }
     690             : }
     691             : 
     692          16 : SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
     693             :                                                    SDValue Vector) const {
     694             :   SDLoc DL(Vector);
     695          16 :   EVT VecVT = Vector.getValueType();
     696          16 :   EVT EltVT = VecVT.getVectorElementType();
     697             :   SmallVector<SDValue, 8> Args;
     698             : 
     699          64 :   for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
     700          48 :     Args.push_back(DAG.getNode(
     701             :         ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
     702          96 :         DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
     703             :   }
     704             : 
     705          16 :   return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
     706             : }
     707             : 
     708       10478 : SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     709             :                                                     SelectionDAG &DAG) const {
     710             :   SDLoc DL(Op);
     711       10478 :   SDValue Vector = Op.getOperand(0);
     712       10478 :   SDValue Index = Op.getOperand(1);
     713             : 
     714          42 :   if (isa<ConstantSDNode>(Index) ||
     715             :       Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
     716       10464 :     return Op;
     717             : 
     718          14 :   Vector = vectorToVerticalVector(DAG, Vector);
     719             :   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
     720          14 :                      Vector, Index);
     721             : }
     722             : 
     723           7 : SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
     724             :                                                    SelectionDAG &DAG) const {
     725             :   SDLoc DL(Op);
     726           7 :   SDValue Vector = Op.getOperand(0);
     727           7 :   SDValue Value = Op.getOperand(1);
     728           7 :   SDValue Index = Op.getOperand(2);
     729             : 
     730           3 :   if (isa<ConstantSDNode>(Index) ||
     731             :       Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
     732           6 :     return Op;
     733             : 
     734           1 :   Vector = vectorToVerticalVector(DAG, Vector);
     735             :   SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
     736           1 :                                Vector, Value, Index);
     737           1 :   return vectorToVerticalVector(DAG, Insert);
     738             : }
     739             : 
     740          57 : SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
     741             :                                                SDValue Op,
     742             :                                                SelectionDAG &DAG) const {
     743             :   GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
     744          57 :   if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
     745          42 :     return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
     746             : 
     747          15 :   const DataLayout &DL = DAG.getDataLayout();
     748          15 :   const GlobalValue *GV = GSD->getGlobal();
     749             :   MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
     750             : 
     751          15 :   SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
     752          30 :   return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
     753             : }
     754             : 
     755          17 : SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
     756             :   // On hw >= R700, COS/SIN input must be between -1. and 1.
     757             :   // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
     758          17 :   EVT VT = Op.getValueType();
     759          17 :   SDValue Arg = Op.getOperand(0);
     760             :   SDLoc DL(Op);
     761             : 
     762             :   // TODO: Should this propagate fast-math-flags?
     763             :   SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
     764             :       DAG.getNode(ISD::FADD, DL, VT,
     765             :         DAG.getNode(ISD::FMUL, DL, VT, Arg,
     766             :           DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
     767          17 :         DAG.getConstantFP(0.5, DL, MVT::f32)));
     768             :   unsigned TrigNode;
     769          17 :   switch (Op.getOpcode()) {
     770             :   case ISD::FCOS:
     771             :     TrigNode = AMDGPUISD::COS_HW;
     772             :     break;
     773          11 :   case ISD::FSIN:
     774             :     TrigNode = AMDGPUISD::SIN_HW;
     775          11 :     break;
     776           0 :   default:
     777           0 :     llvm_unreachable("Wrong trig opcode");
     778             :   }
     779             :   SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
     780             :       DAG.getNode(ISD::FADD, DL, VT, FractPart,
     781          17 :         DAG.getConstantFP(-0.5, DL, MVT::f32)));
     782          17 :   if (Gen >= AMDGPUSubtarget::R700)
     783          17 :     return TrigVal;
     784             :   // On R600 hw, COS/SIN input must be between -Pi and Pi.
     785             :   return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
     786           0 :       DAG.getConstantFP(3.14159265359, DL, MVT::f32));
     787             : }
     788             : 
     789          50 : SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
     790             :   SDLoc DL(Op);
     791          50 :   EVT VT = Op.getValueType();
     792             : 
     793          50 :   SDValue Lo = Op.getOperand(0);
     794          50 :   SDValue Hi = Op.getOperand(1);
     795          50 :   SDValue Shift = Op.getOperand(2);
     796          50 :   SDValue Zero = DAG.getConstant(0, DL, VT);
     797          50 :   SDValue One  = DAG.getConstant(1, DL, VT);
     798             : 
     799          50 :   SDValue Width  = DAG.getConstant(VT.getSizeInBits(), DL, VT);
     800          50 :   SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
     801          50 :   SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
     802          50 :   SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
     803             : 
     804             :   // The dance around Width1 is necessary for 0 special case.
     805             :   // Without it the CompShift might be 32, producing incorrect results in
     806             :   // Overflow. So we do the shift in two steps, the alternative is to
     807             :   // add a conditional to filter the special case.
     808             : 
     809          50 :   SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
     810          50 :   Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
     811             : 
     812          50 :   SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
     813          50 :   HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
     814          50 :   SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
     815             : 
     816          50 :   SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
     817          50 :   SDValue LoBig = Zero;
     818             : 
     819          50 :   Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
     820          50 :   Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
     821             : 
     822          50 :   return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
     823             : }
     824             : 
     825          28 : SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
     826             :   SDLoc DL(Op);
     827          28 :   EVT VT = Op.getValueType();
     828             : 
     829          28 :   SDValue Lo = Op.getOperand(0);
     830          28 :   SDValue Hi = Op.getOperand(1);
     831          28 :   SDValue Shift = Op.getOperand(2);
     832          28 :   SDValue Zero = DAG.getConstant(0, DL, VT);
     833          28 :   SDValue One  = DAG.getConstant(1, DL, VT);
     834             : 
     835             :   const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
     836             : 
     837          28 :   SDValue Width  = DAG.getConstant(VT.getSizeInBits(), DL, VT);
     838          28 :   SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
     839          28 :   SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
     840          28 :   SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
     841             : 
     842             :   // The dance around Width1 is necessary for 0 special case.
     843             :   // Without it the CompShift might be 32, producing incorrect results in
     844             :   // Overflow. So we do the shift in two steps, the alternative is to
     845             :   // add a conditional to filter the special case.
     846             : 
     847          28 :   SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
     848          28 :   Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
     849             : 
     850          49 :   SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
     851          28 :   SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
     852          28 :   LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
     853             : 
     854          28 :   SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
     855          28 :   SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
     856             : 
     857          28 :   Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
     858          28 :   Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
     859             : 
     860          28 :   return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
     861             : }
     862             : 
     863         684 : SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
     864             :                                           unsigned mainop, unsigned ovf) const {
     865             :   SDLoc DL(Op);
     866         684 :   EVT VT = Op.getValueType();
     867             : 
     868         684 :   SDValue Lo = Op.getOperand(0);
     869         684 :   SDValue Hi = Op.getOperand(1);
     870             : 
     871         684 :   SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
     872             :   // Extend sign.
     873         684 :   OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
     874         684 :                     DAG.getValueType(MVT::i1));
     875             : 
     876         684 :   SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
     877             : 
     878         684 :   return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
     879             : }
     880             : 
     881           2 : SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
     882             :   SDLoc DL(Op);
     883             :   return DAG.getNode(
     884             :       ISD::SETCC,
     885             :       DL,
     886             :       MVT::i1,
     887             :       Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
     888           2 :       DAG.getCondCode(ISD::SETEQ));
     889             : }
     890             : 
     891           2 : SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
     892             :   SDLoc DL(Op);
     893             :   return DAG.getNode(
     894             :       ISD::SETCC,
     895             :       DL,
     896             :       MVT::i1,
     897             :       Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
     898           2 :       DAG.getCondCode(ISD::SETEQ));
     899             : }
     900             : 
     901          89 : SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
     902             :                                                    const SDLoc &DL,
     903             :                                                    unsigned DwordOffset) const {
     904          89 :   unsigned ByteOffset = DwordOffset * 4;
     905          89 :   PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
     906             :                                       AMDGPUAS::PARAM_I_ADDRESS);
     907             : 
     908             :   // We shouldn't be using an offset wider than 16-bits for implicit parameters.
     909             :   assert(isInt<16>(ByteOffset));
     910             : 
     911             :   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
     912             :                      DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
     913         178 :                      MachinePointerInfo(ConstantPointerNull::get(PtrType)));
     914             : }
     915             : 
     916       21270 : bool R600TargetLowering::isZero(SDValue Op) const {
     917             :   if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
     918        9775 :     return Cst->isNullValue();
     919             :   } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
     920         444 :     return CstFP->isZero();
     921             :   } else {
     922             :     return false;
     923             :   }
     924             : }
     925             : 
     926       32686 : bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
     927             :   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
     928         470 :     return CFP->isExactlyValue(1.0);
     929             :   }
     930       32216 :   return isAllOnesConstant(Op);
     931             : }
     932             : 
     933        5887 : bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
     934             :   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
     935         310 :     return CFP->getValueAPF().isZero();
     936             :   }
     937        5732 :   return isNullConstant(Op);
     938             : }
     939             : 
     940       16360 : SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     941             :   SDLoc DL(Op);
     942       16360 :   EVT VT = Op.getValueType();
     943             : 
     944       16360 :   SDValue LHS = Op.getOperand(0);
     945       16360 :   SDValue RHS = Op.getOperand(1);
     946       16360 :   SDValue True = Op.getOperand(2);
     947       16360 :   SDValue False = Op.getOperand(3);
     948       16360 :   SDValue CC = Op.getOperand(4);
     949             :   SDValue Temp;
     950             : 
     951             :   if (VT == MVT::f32) {
     952             :     DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
     953         383 :     SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
     954         383 :     if (MinMax)
     955          21 :       return MinMax;
     956             :   }
     957             : 
     958             :   // LHS and RHS are guaranteed to be the same value type
     959       16339 :   EVT CompareVT = LHS.getValueType();
     960             : 
     961             :   // Check if we can lower this to a native operation.
     962             : 
     963             :   // Try to lower to a SET* instruction:
     964             :   //
     965             :   // SET* can match the following patterns:
     966             :   //
     967             :   // select_cc f32, f32, -1,  0, cc_supported
     968             :   // select_cc f32, f32, 1.0f, 0.0f, cc_supported
     969             :   // select_cc i32, i32, -1,  0, cc_supported
     970             :   //
     971             : 
     972             :   // Move hardware True/False values to the correct operand.
     973       16339 :   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
     974             :   ISD::CondCode InverseCC =
     975       16339 :      ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
     976       16339 :   if (isHWTrueValue(False) && isHWFalseValue(True)) {
     977          82 :     if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
     978             :       std::swap(False, True);
     979          11 :       CC = DAG.getCondCode(InverseCC);
     980             :     } else {
     981          71 :       ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
     982          71 :       if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
     983             :         std::swap(False, True);
     984             :         std::swap(LHS, RHS);
     985          60 :         CC = DAG.getCondCode(SwapInvCC);
     986             :       }
     987             :     }
     988             :   }
     989             : 
     990       16339 :   if (isHWTrueValue(True) && isHWFalseValue(False) &&
     991             :       (CompareVT == VT || VT == MVT::i32)) {
     992             :     // This can be matched by a SET* instruction.
     993        5704 :     return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
     994             :   }
     995             : 
     996             :   // Try to lower to a CND* instruction:
     997             :   //
     998             :   // CND* can match the following patterns:
     999             :   //
    1000             :   // select_cc f32, 0.0, f32, f32, cc_supported
    1001             :   // select_cc f32, 0.0, i32, i32, cc_supported
    1002             :   // select_cc i32, 0,   f32, f32, cc_supported
    1003             :   // select_cc i32, 0,   i32, i32, cc_supported
    1004             :   //
    1005             : 
    1006             :   // Try to move the zero value to the RHS
    1007       10635 :   if (isZero(LHS)) {
    1008           3 :     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
    1009             :     // Try swapping the operands
    1010           3 :     ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
    1011           3 :     if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
    1012             :       std::swap(LHS, RHS);
    1013           0 :       CC = DAG.getCondCode(CCSwapped);
    1014             :     } else {
    1015             :       // Try inverting the conditon and then swapping the operands
    1016           3 :       ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
    1017           3 :       CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
    1018           3 :       if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
    1019             :         std::swap(True, False);
    1020             :         std::swap(LHS, RHS);
    1021           0 :         CC = DAG.getCondCode(CCSwapped);
    1022             :       }
    1023             :     }
    1024             :   }
    1025       10635 :   if (isZero(RHS)) {
    1026        9757 :     SDValue Cond = LHS;
    1027        9757 :     SDValue Zero = RHS;
    1028        9757 :     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
    1029        9757 :     if (CompareVT != VT) {
    1030             :       // Bitcast True / False to the correct types.  This will end up being
    1031             :       // a nop, but it allows us to define only a single pattern in the
    1032             :       // .TD files for each CND* instruction rather than having to have
    1033             :       // one pattern for integer True/False and one for fp True/False
    1034          55 :       True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
    1035          55 :       False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
    1036             :     }
    1037             : 
    1038             :     switch (CCOpcode) {
    1039             :     case ISD::SETONE:
    1040             :     case ISD::SETUNE:
    1041             :     case ISD::SETNE:
    1042        2149 :       CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
    1043        2149 :       Temp = True;
    1044             :       True = False;
    1045             :       False = Temp;
    1046        2149 :       break;
    1047             :     default:
    1048             :       break;
    1049             :     }
    1050             :     SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
    1051             :         Cond, Zero,
    1052             :         True, False,
    1053        9757 :         DAG.getCondCode(CCOpcode));
    1054        9757 :     return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
    1055             :   }
    1056             : 
    1057             :   // If we make it this for it means we have no native instructions to handle
    1058             :   // this SELECT_CC, so we must lower it.
    1059         878 :   SDValue HWTrue, HWFalse;
    1060             : 
    1061             :   if (CompareVT == MVT::f32) {
    1062          60 :     HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
    1063          60 :     HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
    1064             :   } else if (CompareVT == MVT::i32) {
    1065         818 :     HWTrue = DAG.getConstant(-1, DL, CompareVT);
    1066         818 :     HWFalse = DAG.getConstant(0, DL, CompareVT);
    1067             :   }
    1068             :   else {
    1069           0 :     llvm_unreachable("Unhandled value type in LowerSELECT_CC");
    1070             :   }
    1071             : 
    1072             :   // Lower this unsupported SELECT_CC into a combination of two supported
    1073             :   // SELECT_CC operations.
    1074         878 :   SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
    1075             : 
    1076             :   return DAG.getNode(ISD::SELECT_CC, DL, VT,
    1077             :       Cond, HWFalse,
    1078             :       True, False,
    1079         878 :       DAG.getCondCode(ISD::SETNE));
    1080             : }
    1081             : 
    1082             : /// LLVM generates byte-addressed pointers.  For indirect addressing, we need to
    1083             : /// convert these pointers to a register index.  Each register holds
    1084             : /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
    1085             : /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
    1086             : /// for indirect addressing.
    1087           0 : SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
    1088             :                                                unsigned StackWidth,
    1089             :                                                SelectionDAG &DAG) const {
    1090             :   unsigned SRLPad;
    1091           0 :   switch(StackWidth) {
    1092             :   case 1:
    1093             :     SRLPad = 2;
    1094             :     break;
    1095           0 :   case 2:
    1096             :     SRLPad = 3;
    1097           0 :     break;
    1098           0 :   case 4:
    1099             :     SRLPad = 4;
    1100           0 :     break;
    1101           0 :   default: llvm_unreachable("Invalid stack width");
    1102             :   }
    1103             : 
    1104             :   SDLoc DL(Ptr);
    1105             :   return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
    1106           0 :                      DAG.getConstant(SRLPad, DL, MVT::i32));
    1107             : }
    1108             : 
    1109           0 : void R600TargetLowering::getStackAddress(unsigned StackWidth,
    1110             :                                          unsigned ElemIdx,
    1111             :                                          unsigned &Channel,
    1112             :                                          unsigned &PtrIncr) const {
    1113           0 :   switch (StackWidth) {
    1114           0 :   default:
    1115             :   case 1:
    1116           0 :     Channel = 0;
    1117           0 :     if (ElemIdx > 0) {
    1118           0 :       PtrIncr = 1;
    1119             :     } else {
    1120           0 :       PtrIncr = 0;
    1121             :     }
    1122             :     break;
    1123           0 :   case 2:
    1124           0 :     Channel = ElemIdx % 2;
    1125           0 :     if (ElemIdx == 2) {
    1126           0 :       PtrIncr = 1;
    1127             :     } else {
    1128           0 :       PtrIncr = 0;
    1129             :     }
    1130             :     break;
    1131           0 :   case 4:
    1132           0 :     Channel = ElemIdx;
    1133           0 :     PtrIncr = 0;
    1134           0 :     break;
    1135             :   }
    1136           0 : }
    1137             : 
    1138        1313 : SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
    1139             :                                                    SelectionDAG &DAG) const {
    1140             :   SDLoc DL(Store);
    1141             :   //TODO: Who creates the i8 stores?
    1142             :   assert(Store->isTruncatingStore()
    1143             :          || Store->getValue().getValueType() == MVT::i8);
    1144             :   assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
    1145             : 
    1146        1313 :   SDValue Mask;
    1147             :   if (Store->getMemoryVT() == MVT::i8) {
    1148             :     assert(Store->getAlignment() >= 1);
    1149         591 :     Mask = DAG.getConstant(0xff, DL, MVT::i32);
    1150             :   } else if (Store->getMemoryVT() == MVT::i16) {
    1151             :     assert(Store->getAlignment() >= 2);
    1152         722 :     Mask = DAG.getConstant(0xffff, DL, MVT::i32);
    1153             :   } else {
    1154           0 :     llvm_unreachable("Unsupported private trunc store");
    1155             :   }
    1156             : 
    1157        1313 :   SDValue OldChain = Store->getChain();
    1158        1313 :   bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
    1159             :   // Skip dummy
    1160        1313 :   SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
    1161        1313 :   SDValue BasePtr = Store->getBasePtr();
    1162        1313 :   SDValue Offset = Store->getOffset();
    1163        1313 :   EVT MemVT = Store->getMemoryVT();
    1164             : 
    1165        1313 :   SDValue LoadPtr = BasePtr;
    1166        1313 :   if (!Offset.isUndef()) {
    1167           0 :     LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
    1168             :   }
    1169             : 
    1170             :   // Get dword location
    1171             :   // TODO: this should be eliminated by the future SHR ptr, 2
    1172             :   SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
    1173        1313 :                             DAG.getConstant(0xfffffffc, DL, MVT::i32));
    1174             : 
    1175             :   // Load dword
    1176             :   // TODO: can we be smarter about machine pointer info?
    1177        1313 :   MachinePointerInfo PtrInfo(UndefValue::get(
    1178        1313 :       Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
    1179        1313 :   SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
    1180             : 
    1181        1313 :   Chain = Dst.getValue(1);
    1182             : 
    1183             :   // Get offset in dword
    1184             :   SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
    1185        1313 :                                 DAG.getConstant(0x3, DL, MVT::i32));
    1186             : 
    1187             :   // Convert byte offset to bit shift
    1188             :   SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
    1189        1313 :                                  DAG.getConstant(3, DL, MVT::i32));
    1190             : 
    1191             :   // TODO: Contrary to the name of the functiom,
    1192             :   // it also handles sub i32 non-truncating stores (like i1)
    1193             :   SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
    1194        1313 :                                   Store->getValue());
    1195             : 
    1196             :   // Mask the value to the right type
    1197        1313 :   SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
    1198             : 
    1199             :   // Shift the value in place
    1200             :   SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
    1201        1313 :                                      MaskedValue, ShiftAmt);
    1202             : 
    1203             :   // Shift the mask in place
    1204        1313 :   SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
    1205             : 
    1206             :   // Invert the mask. NOTE: if we had native ROL instructions we could
    1207             :   // use inverted mask
    1208        1313 :   DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
    1209             : 
    1210             :   // Cleanup the target bits
    1211        1313 :   Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
    1212             : 
    1213             :   // Add the new bits
    1214        1313 :   SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
    1215             : 
    1216             :   // Store dword
    1217             :   // TODO: Can we be smarter about MachinePointerInfo?
    1218        1313 :   SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
    1219             : 
    1220             :   // If we are part of expanded vector, make our neighbors depend on this store
    1221        1313 :   if (VectorTrunc) {
    1222             :     // Make all other vector elements depend on this store
    1223         868 :     Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
    1224         868 :     DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
    1225             :   }
    1226        1313 :   return NewStore;
    1227             : }
    1228             : 
    1229       33975 : SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    1230             :   StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
    1231             :   unsigned AS = StoreNode->getAddressSpace();
    1232             : 
    1233       33975 :   SDValue Chain = StoreNode->getChain();
    1234       33975 :   SDValue Ptr = StoreNode->getBasePtr();
    1235       33975 :   SDValue Value = StoreNode->getValue();
    1236             : 
    1237       33975 :   EVT VT = Value.getValueType();
    1238       33975 :   EVT MemVT = StoreNode->getMemoryVT();
    1239       33975 :   EVT PtrVT = Ptr.getValueType();
    1240             : 
    1241             :   SDLoc DL(Op);
    1242             : 
    1243             :   // Neither LOCAL nor PRIVATE can do vectors at the moment
    1244       58217 :   if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
    1245             :       VT.isVector()) {
    1246         947 :     if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
    1247         304 :          StoreNode->isTruncatingStore()) {
    1248             :       // Add an extra level of chain to isolate this vector
    1249         243 :       SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
    1250             :       // TODO: can the chain be replaced without creating a new store?
    1251             :       SDValue NewStore = DAG.getTruncStore(
    1252         243 :           NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
    1253             :           MemVT, StoreNode->getAlignment(),
    1254         486 :           StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
    1255             :       StoreNode = cast<StoreSDNode>(NewStore);
    1256             :     }
    1257             : 
    1258         947 :     return scalarizeVectorStore(StoreNode, DAG);
    1259             :   }
    1260             : 
    1261       33028 :   unsigned Align = StoreNode->getAlignment();
    1262       33267 :   if (Align < MemVT.getStoreSize() &&
    1263         239 :       !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
    1264          24 :     return expandUnalignedStore(StoreNode, DAG);
    1265             :   }
    1266             : 
    1267             :   SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
    1268       33004 :                                   DAG.getConstant(2, DL, PtrVT));
    1269             : 
    1270       33004 :   if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
    1271             :     // It is beneficial to create MSKOR here instead of combiner to avoid
    1272             :     // artificial dependencies introduced by RMW
    1273        9727 :     if (StoreNode->isTruncatingStore()) {
    1274             :       assert(VT.bitsLE(MVT::i32));
    1275         228 :       SDValue MaskConstant;
    1276             :       if (MemVT == MVT::i8) {
    1277         121 :         MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
    1278             :       } else {
    1279             :         assert(MemVT == MVT::i16);
    1280             :         assert(StoreNode->getAlignment() >= 2);
    1281         107 :         MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
    1282             :       }
    1283             : 
    1284             :       SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
    1285         228 :                                       DAG.getConstant(0x00000003, DL, PtrVT));
    1286             :       SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
    1287         228 :                                      DAG.getConstant(3, DL, VT));
    1288             : 
    1289             :       // Put the mask in correct place
    1290         228 :       SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
    1291             : 
    1292             :       // Put the value bits in correct place
    1293         228 :       SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
    1294         228 :       SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
    1295             : 
    1296             :       // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
    1297             :       // vector instead.
    1298             :       SDValue Src[4] = {
    1299             :         ShiftedValue,
    1300         228 :         DAG.getConstant(0, DL, MVT::i32),
    1301         228 :         DAG.getConstant(0, DL, MVT::i32),
    1302             :         Mask
    1303         456 :       };
    1304         228 :       SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
    1305         228 :       SDValue Args[3] = { Chain, Input, DWordAddr };
    1306             :       return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
    1307             :                                      Op->getVTList(), Args, MemVT,
    1308         684 :                                      StoreNode->getMemOperand());
    1309        9499 :     } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
    1310             :       // Convert pointer from byte address to dword address.
    1311        2975 :       Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
    1312             : 
    1313        2975 :       if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
    1314           0 :         llvm_unreachable("Truncated and indexed stores not supported yet");
    1315             :       } else {
    1316        2975 :         Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
    1317             :       }
    1318        2975 :       return Chain;
    1319             :     }
    1320             :   }
    1321             : 
    1322             :   // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
    1323       29801 :   if (AS != AMDGPUAS::PRIVATE_ADDRESS)
    1324       19144 :     return SDValue();
    1325             : 
    1326       10657 :   if (MemVT.bitsLT(MVT::i32))
    1327        1313 :     return lowerPrivateTruncStore(StoreNode, DAG);
    1328             : 
    1329             :   // Standard i32+ store, tag it with DWORDADDR to note that the address
    1330             :   // has been shifted
    1331        9344 :   if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
    1332        2776 :     Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
    1333        2776 :     return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
    1334             :   }
    1335             : 
    1336             :   // Tagged i32+ stores will be matched by patterns
    1337        6568 :   return SDValue();
    1338             : }
    1339             : 
    1340             : // return (512 + (kc_bank << 12)
    1341             : static int
    1342             : ConstantAddressBlock(unsigned AddressSpace) {
    1343             :   switch (AddressSpace) {
    1344             :   case AMDGPUAS::CONSTANT_BUFFER_0:
    1345             :     return 512;
    1346             :   case AMDGPUAS::CONSTANT_BUFFER_1:
    1347             :     return 512 + 4096;
    1348             :   case AMDGPUAS::CONSTANT_BUFFER_2:
    1349             :     return 512 + 4096 * 2;
    1350             :   case AMDGPUAS::CONSTANT_BUFFER_3:
    1351             :     return 512 + 4096 * 3;
    1352             :   case AMDGPUAS::CONSTANT_BUFFER_4:
    1353             :     return 512 + 4096 * 4;
    1354             :   case AMDGPUAS::CONSTANT_BUFFER_5:
    1355             :     return 512 + 4096 * 5;
    1356             :   case AMDGPUAS::CONSTANT_BUFFER_6:
    1357             :     return 512 + 4096 * 6;
    1358             :   case AMDGPUAS::CONSTANT_BUFFER_7:
    1359             :     return 512 + 4096 * 7;
    1360             :   case AMDGPUAS::CONSTANT_BUFFER_8:
    1361             :     return 512 + 4096 * 8;
    1362             :   case AMDGPUAS::CONSTANT_BUFFER_9:
    1363             :     return 512 + 4096 * 9;
    1364             :   case AMDGPUAS::CONSTANT_BUFFER_10:
    1365             :     return 512 + 4096 * 10;
    1366             :   case AMDGPUAS::CONSTANT_BUFFER_11:
    1367             :     return 512 + 4096 * 11;
    1368             :   case AMDGPUAS::CONSTANT_BUFFER_12:
    1369             :     return 512 + 4096 * 12;
    1370             :   case AMDGPUAS::CONSTANT_BUFFER_13:
    1371             :     return 512 + 4096 * 13;
    1372             :   case AMDGPUAS::CONSTANT_BUFFER_14:
    1373             :     return 512 + 4096 * 14;
    1374             :   case AMDGPUAS::CONSTANT_BUFFER_15:
    1375             :     return 512 + 4096 * 15;
    1376             :   default:
    1377             :     return -1;
    1378             :   }
    1379             : }
    1380             : 
    1381        4054 : SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
    1382             :                                                 SelectionDAG &DAG) const {
    1383             :   SDLoc DL(Op);
    1384             :   LoadSDNode *Load = cast<LoadSDNode>(Op);
    1385             :   ISD::LoadExtType ExtType = Load->getExtensionType();
    1386        4054 :   EVT MemVT = Load->getMemoryVT();
    1387             :   assert(Load->getAlignment() >= MemVT.getStoreSize());
    1388             : 
    1389        4054 :   SDValue BasePtr = Load->getBasePtr();
    1390        4054 :   SDValue Chain = Load->getChain();
    1391        4054 :   SDValue Offset = Load->getOffset();
    1392             : 
    1393        4054 :   SDValue LoadPtr = BasePtr;
    1394        4054 :   if (!Offset.isUndef()) {
    1395           0 :     LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
    1396             :   }
    1397             : 
    1398             :   // Get dword location
    1399             :   // NOTE: this should be eliminated by the future SHR ptr, 2
    1400             :   SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
    1401        4054 :                             DAG.getConstant(0xfffffffc, DL, MVT::i32));
    1402             : 
    1403             :   // Load dword
    1404             :   // TODO: can we be smarter about machine pointer info?
    1405        4054 :   MachinePointerInfo PtrInfo(UndefValue::get(
    1406        4054 :       Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
    1407        4054 :   SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
    1408             : 
    1409             :   // Get offset within the register.
    1410             :   SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
    1411        4054 :                                 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
    1412             : 
    1413             :   // Bit offset of target byte (byteIdx * 8).
    1414             :   SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
    1415        4054 :                                  DAG.getConstant(3, DL, MVT::i32));
    1416             : 
    1417             :   // Shift to the right.
    1418        4054 :   SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
    1419             : 
    1420             :   // Eliminate the upper bits by setting them to ...
    1421        4054 :   EVT MemEltVT = MemVT.getScalarType();
    1422             : 
    1423        4054 :   if (ExtType == ISD::SEXTLOAD) { // ... ones.
    1424        1280 :     SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
    1425        1280 :     Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
    1426             :   } else { // ... or zeros.
    1427        2774 :     Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
    1428             :   }
    1429             : 
    1430             :   SDValue Ops[] = {
    1431             :     Ret,
    1432        4054 :     Read.getValue(1) // This should be our output chain
    1433        4054 :   };
    1434             : 
    1435        4054 :   return DAG.getMergeValues(Ops, DL);
    1436             : }
    1437             : 
    1438       35619 : SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
    1439             :   LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
    1440             :   unsigned AS = LoadNode->getAddressSpace();
    1441       35619 :   EVT MemVT = LoadNode->getMemoryVT();
    1442             :   ISD::LoadExtType ExtType = LoadNode->getExtensionType();
    1443             : 
    1444       71238 :   if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
    1445       35619 :       ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
    1446        4054 :     return lowerPrivateExtLoad(Op, DAG);
    1447             :   }
    1448             : 
    1449             :   SDLoc DL(Op);
    1450       31565 :   EVT VT = Op.getValueType();
    1451       31565 :   SDValue Chain = LoadNode->getChain();
    1452       31565 :   SDValue Ptr = LoadNode->getBasePtr();
    1453             : 
    1454       26909 :   if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
    1455       57772 :       LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
    1456             :       VT.isVector()) {
    1457         382 :       return scalarizeVectorLoad(LoadNode, DAG);
    1458             :   }
    1459             : 
    1460             :   // This is still used for explicit load from addrspace(8)
    1461             :   int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
    1462         276 :   if (ConstantBlock > -1 &&
    1463           0 :       ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
    1464             :        (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
    1465             :     SDValue Result;
    1466         552 :     if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
    1467             :         isa<ConstantSDNode>(Ptr)) {
    1468         276 :       return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
    1469             :     } else {
    1470             :       //TODO: Does this even work?
    1471             :       // non-constant ptr can't be folded, keeps it as a v4f32 load
    1472           0 :       Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
    1473             :           DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
    1474             :                       DAG.getConstant(4, DL, MVT::i32)),
    1475           0 :                       DAG.getConstant(LoadNode->getAddressSpace() -
    1476             :                                       AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
    1477           0 :           );
    1478             :     }
    1479             : 
    1480           0 :     if (!VT.isVector()) {
    1481           0 :       Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
    1482           0 :                            DAG.getConstant(0, DL, MVT::i32));
    1483             :     }
    1484             : 
    1485             :     SDValue MergedValues[2] = {
    1486             :       Result,
    1487             :       Chain
    1488           0 :     };
    1489           0 :     return DAG.getMergeValues(MergedValues, DL);
    1490             :   }
    1491             : 
    1492             :   // For most operations returning SDValue() will result in the node being
    1493             :   // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
    1494             :   // need to manually expand loads that may be legal in some address spaces and
    1495             :   // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
    1496             :   // compute shaders, since the data is sign extended when it is uploaded to the
    1497             :   // buffer. However SEXT loads from other address spaces are not supported, so
    1498             :   // we need to expand them here.
    1499       30907 :   if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
    1500         380 :     EVT MemVT = LoadNode->getMemoryVT();
    1501             :     assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
    1502             :     SDValue NewLoad = DAG.getExtLoad(
    1503         380 :         ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
    1504         380 :         LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
    1505             :     SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
    1506         380 :                               DAG.getValueType(MemVT));
    1507             : 
    1508         380 :     SDValue MergedValues[2] = { Res, Chain };
    1509         380 :     return DAG.getMergeValues(MergedValues, DL);
    1510             :   }
    1511             : 
    1512       30527 :   if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
    1513        9161 :     return SDValue();
    1514             :   }
    1515             : 
    1516             :   // DWORDADDR ISD marks already shifted address
    1517       21366 :   if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
    1518             :     assert(VT == MVT::i32);
    1519        5898 :     Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
    1520        5898 :     Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
    1521       11796 :     return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
    1522             :   }
    1523       15468 :   return SDValue();
    1524             : }
    1525             : 
    1526          86 : SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
    1527          86 :   SDValue Chain = Op.getOperand(0);
    1528          86 :   SDValue Cond  = Op.getOperand(1);
    1529          86 :   SDValue Jump  = Op.getOperand(2);
    1530             : 
    1531          86 :   return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
    1532          86 :                      Chain, Jump, Cond);
    1533             : }
    1534             : 
    1535        1606 : SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
    1536             :                                             SelectionDAG &DAG) const {
    1537        1606 :   MachineFunction &MF = DAG.getMachineFunction();
    1538        1606 :   const R600FrameLowering *TFL = Subtarget->getFrameLowering();
    1539             : 
    1540             :   FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
    1541             : 
    1542        1606 :   unsigned FrameIndex = FIN->getIndex();
    1543             :   unsigned IgnoredFrameReg;
    1544             :   unsigned Offset =
    1545        1606 :     TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
    1546        1606 :   return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
    1547        1606 :                          Op.getValueType());
    1548             : }
    1549             : 
    1550          50 : CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
    1551             :                                                   bool IsVarArg) const {
    1552             :   switch (CC) {
    1553             :   case CallingConv::AMDGPU_KERNEL:
    1554             :   case CallingConv::SPIR_KERNEL:
    1555             :   case CallingConv::C:
    1556             :   case CallingConv::Fast:
    1557             :   case CallingConv::Cold:
    1558             :     llvm_unreachable("kernels should not be handled here");
    1559          50 :   case CallingConv::AMDGPU_VS:
    1560             :   case CallingConv::AMDGPU_GS:
    1561             :   case CallingConv::AMDGPU_PS:
    1562             :   case CallingConv::AMDGPU_CS:
    1563             :   case CallingConv::AMDGPU_HS:
    1564             :   case CallingConv::AMDGPU_ES:
    1565             :   case CallingConv::AMDGPU_LS:
    1566          50 :     return CC_R600;
    1567           0 :   default:
    1568           0 :     report_fatal_error("Unsupported calling convention.");
    1569             :   }
    1570             : }
    1571             : 
    1572             : /// XXX Only kernel functions are supported, so we can assume for now that
    1573             : /// every function is a kernel function, but in the future we should use
    1574             : /// separate calling conventions for kernel and non-kernel functions.
    1575        2298 : SDValue R600TargetLowering::LowerFormalArguments(
    1576             :     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
    1577             :     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
    1578             :     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
    1579             :   SmallVector<CCValAssign, 16> ArgLocs;
    1580             :   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
    1581        4596 :                  *DAG.getContext());
    1582        2298 :   MachineFunction &MF = DAG.getMachineFunction();
    1583             :   SmallVector<ISD::InputArg, 8> LocalIns;
    1584             : 
    1585        2298 :   if (AMDGPU::isShader(CallConv)) {
    1586          50 :     CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
    1587             :   } else {
    1588        2248 :     analyzeFormalArgumentsCompute(CCInfo, Ins);
    1589             :   }
    1590             : 
    1591        8584 :   for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
    1592        6286 :     CCValAssign &VA = ArgLocs[i];
    1593             :     const ISD::InputArg &In = Ins[i];
    1594             :     EVT VT = In.VT;
    1595             :     EVT MemVT = VA.getLocVT();
    1596       12190 :     if (!VT.isVector() && MemVT.isVector()) {
    1597             :       // Get load source type if scalarized.
    1598           0 :       MemVT = MemVT.getVectorElementType();
    1599             :     }
    1600             : 
    1601        6286 :     if (AMDGPU::isShader(CallConv)) {
    1602          65 :       unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
    1603          65 :       SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
    1604          65 :       InVals.push_back(Register);
    1605             :       continue;
    1606             :     }
    1607             : 
    1608        6221 :     PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
    1609             :                                           AMDGPUAS::PARAM_I_ADDRESS);
    1610             : 
    1611             :     // i64 isn't a legal type, so the register type used ends up as i32, which
    1612             :     // isn't expected here. It attempts to create this sextload, but it ends up
    1613             :     // being invalid. Somehow this seems to work with i64 arguments, but breaks
    1614             :     // for <1 x i64>.
    1615             : 
    1616             :     // The first 36 bytes of the input buffer contains information about
    1617             :     // thread group and global sizes.
    1618             :     ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
    1619        6221 :     if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
    1620             :       // FIXME: This should really check the extload type, but the handling of
    1621             :       // extload vector parameters seems to be broken.
    1622             : 
    1623             :       // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
    1624             :       Ext = ISD::SEXTLOAD;
    1625             :     }
    1626             : 
    1627             :     // Compute the offset from the value.
    1628             :     // XXX - I think PartOffset should give you this, but it seems to give the
    1629             :     // size of the register which isn't useful.
    1630             : 
    1631        6221 :     unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
    1632        6221 :     unsigned PartOffset = VA.getLocMemOffset();
    1633        6221 :     unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
    1634             : 
    1635        6221 :     MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
    1636             :     SDValue Arg = DAG.getLoad(
    1637             :         ISD::UNINDEXED, Ext, VT, DL, Chain,
    1638             :         DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
    1639             :         PtrInfo,
    1640             :         MemVT, Alignment, MachineMemOperand::MONonTemporal |
    1641             :                                         MachineMemOperand::MODereferenceable |
    1642        6221 :                                         MachineMemOperand::MOInvariant);
    1643             : 
    1644        6221 :     InVals.push_back(Arg);
    1645             :   }
    1646        2298 :   return Chain;
    1647             : }
    1648             : 
    1649       35340 : EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
    1650             :                                            EVT VT) const {
    1651       35340 :    if (!VT.isVector())
    1652       35253 :      return MVT::i32;
    1653          87 :    return VT.changeVectorElementTypeToInteger();
    1654             : }
    1655             : 
    1656         117 : bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
    1657             :                                           const SelectionDAG &DAG) const {
    1658             :   // Local and Private addresses do not handle vectors. Limit to i32
    1659         117 :   if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
    1660         116 :     return (MemVT.getSizeInBits() <= 32);
    1661             :   }
    1662             :   return true;
    1663             : }
    1664             : 
    1665         829 : bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
    1666             :                                                         unsigned AddrSpace,
    1667             :                                                         unsigned Align,
    1668             :                                                         bool *IsFast) const {
    1669         829 :   if (IsFast)
    1670         590 :     *IsFast = false;
    1671             : 
    1672         829 :   if (!VT.isSimple() || VT == MVT::Other)
    1673           2 :     return false;
    1674             : 
    1675         827 :   if (VT.bitsLT(MVT::i32))
    1676             :     return false;
    1677             : 
    1678             :   // TODO: This is a rough estimate.
    1679         787 :   if (IsFast)
    1680         572 :     *IsFast = true;
    1681             : 
    1682         849 :   return VT.bitsGT(MVT::i32) && Align % 4 == 0;
    1683             : }
    1684             : 
    1685           0 : static SDValue CompactSwizzlableVector(
    1686             :   SelectionDAG &DAG, SDValue VectorEntry,
    1687             :   DenseMap<unsigned, unsigned> &RemapSwizzle) {
    1688             :   assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
    1689             :   assert(RemapSwizzle.empty());
    1690             :   SDValue NewBldVec[4] = {
    1691             :     VectorEntry.getOperand(0),
    1692             :     VectorEntry.getOperand(1),
    1693             :     VectorEntry.getOperand(2),
    1694             :     VectorEntry.getOperand(3)
    1695           0 :   };
    1696             : 
    1697           0 :   for (unsigned i = 0; i < 4; i++) {
    1698           0 :     if (NewBldVec[i].isUndef())
    1699             :       // We mask write here to teach later passes that the ith element of this
    1700             :       // vector is undef. Thus we can use it to reduce 128 bits reg usage,
    1701             :       // break false dependencies and additionnaly make assembly easier to read.
    1702           0 :       RemapSwizzle[i] = 7; // SEL_MASK_WRITE
    1703           0 :     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
    1704           0 :       if (C->isZero()) {
    1705           0 :         RemapSwizzle[i] = 4; // SEL_0
    1706           0 :         NewBldVec[i] = DAG.getUNDEF(MVT::f32);
    1707           0 :       } else if (C->isExactlyValue(1.0)) {
    1708           0 :         RemapSwizzle[i] = 5; // SEL_1
    1709           0 :         NewBldVec[i] = DAG.getUNDEF(MVT::f32);
    1710             :       }
    1711             :     }
    1712             : 
    1713           0 :     if (NewBldVec[i].isUndef())
    1714           0 :       continue;
    1715           0 :     for (unsigned j = 0; j < i; j++) {
    1716           0 :       if (NewBldVec[i] == NewBldVec[j]) {
    1717           0 :         NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
    1718           0 :         RemapSwizzle[i] = j;
    1719           0 :         break;
    1720             :       }
    1721             :     }
    1722             :   }
    1723             : 
    1724           0 :   return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
    1725           0 :                             NewBldVec);
    1726             : }
    1727             : 
    1728           0 : static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
    1729             :                                 DenseMap<unsigned, unsigned> &RemapSwizzle) {
    1730             :   assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
    1731             :   assert(RemapSwizzle.empty());
    1732             :   SDValue NewBldVec[4] = {
    1733             :       VectorEntry.getOperand(0),
    1734             :       VectorEntry.getOperand(1),
    1735             :       VectorEntry.getOperand(2),
    1736             :       VectorEntry.getOperand(3)
    1737           0 :   };
    1738           0 :   bool isUnmovable[4] = { false, false, false, false };
    1739           0 :   for (unsigned i = 0; i < 4; i++) {
    1740           0 :     RemapSwizzle[i] = i;
    1741           0 :     if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
    1742             :       unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
    1743           0 :           ->getZExtValue();
    1744           0 :       if (i == Idx)
    1745           0 :         isUnmovable[Idx] = true;
    1746             :     }
    1747             :   }
    1748             : 
    1749           0 :   for (unsigned i = 0; i < 4; i++) {
    1750           0 :     if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
    1751             :       unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
    1752           0 :           ->getZExtValue();
    1753           0 :       if (isUnmovable[Idx])
    1754           0 :         continue;
    1755             :       // Swap i and Idx
    1756           0 :       std::swap(NewBldVec[Idx], NewBldVec[i]);
    1757             :       std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
    1758           0 :       break;
    1759             :     }
    1760             :   }
    1761             : 
    1762           0 :   return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
    1763           0 :                             NewBldVec);
    1764             : }
    1765             : 
    1766         392 : SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
    1767             :                                             SelectionDAG &DAG,
    1768             :                                             const SDLoc &DL) const {
    1769             :   assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
    1770             :   // Old -> New swizzle values
    1771             :   DenseMap<unsigned, unsigned> SwizzleRemap;
    1772             : 
    1773         392 :   BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
    1774        1960 :   for (unsigned i = 0; i < 4; i++) {
    1775        4704 :     unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
    1776        1568 :     if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
    1777         119 :       Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
    1778             :   }
    1779             : 
    1780         392 :   SwizzleRemap.clear();
    1781         392 :   BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
    1782        1960 :   for (unsigned i = 0; i < 4; i++) {
    1783        4704 :     unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
    1784        1568 :     if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
    1785        1350 :       Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
    1786             :   }
    1787             : 
    1788         392 :   return BuildVector;
    1789             : }
    1790             : 
    1791        7164 : SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
    1792             :                                             SelectionDAG &DAG) const {
    1793             :   SDLoc DL(LoadNode);
    1794       14328 :   EVT VT = LoadNode->getValueType(0);
    1795        7164 :   SDValue Chain = LoadNode->getChain();
    1796        7164 :   SDValue Ptr = LoadNode->getBasePtr();
    1797             :   assert (isa<ConstantSDNode>(Ptr));
    1798             : 
    1799             :   //TODO: Support smaller loads
    1800        7164 :   if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
    1801        1908 :     return SDValue();
    1802             : 
    1803        5256 :   if (LoadNode->getAlignment() < 4)
    1804          18 :     return SDValue();
    1805             : 
    1806        5238 :   int ConstantBlock = ConstantAddressBlock(Block);
    1807             : 
    1808        5238 :   SDValue Slots[4];
    1809       26190 :   for (unsigned i = 0; i < 4; i++) {
    1810             :     // We want Const position encoded with the following formula :
    1811             :     // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
    1812             :     // const_index is Ptr computed by llvm using an alignment of 16.
    1813             :     // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
    1814             :     // then div by 4 at the ISel step
    1815             :     SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
    1816       20952 :         DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
    1817       20952 :     Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
    1818             :   }
    1819        5238 :   EVT NewVT = MVT::v4i32;
    1820             :   unsigned NumElements = 4;
    1821        5238 :   if (VT.isVector()) {
    1822         559 :     NewVT = VT;
    1823             :     NumElements = VT.getVectorNumElements();
    1824             :   }
    1825        5238 :   SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
    1826        5238 :   if (!VT.isVector()) {
    1827        4679 :     Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
    1828        4679 :                          DAG.getConstant(0, DL, MVT::i32));
    1829             :   }
    1830             :   SDValue MergedValues[2] = {
    1831             :     Result,
    1832             :     Chain
    1833        5238 :   };
    1834        5238 :   return DAG.getMergeValues(MergedValues, DL);
    1835             : }
    1836             : 
    1837             : //===----------------------------------------------------------------------===//
    1838             : // Custom DAG Optimizations
    1839             : //===----------------------------------------------------------------------===//
    1840             : 
    1841      213667 : SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
    1842             :                                               DAGCombinerInfo &DCI) const {
    1843      213667 :   SelectionDAG &DAG = DCI.DAG;
    1844             :   SDLoc DL(N);
    1845             : 
    1846      427334 :   switch (N->getOpcode()) {
    1847             :   // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
    1848           2 :   case ISD::FP_ROUND: {
    1849           2 :       SDValue Arg = N->getOperand(0);
    1850           2 :       if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
    1851             :         return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
    1852           2 :                            Arg.getOperand(0));
    1853             :       }
    1854             :       break;
    1855             :     }
    1856             : 
    1857             :   // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
    1858             :   // (i32 select_cc f32, f32, -1, 0 cc)
    1859             :   //
    1860             :   // Mesa's GLSL frontend generates the above pattern a lot and we can lower
    1861             :   // this to one of the SET*_DX10 instructions.
    1862          58 :   case ISD::FP_TO_SINT: {
    1863          58 :     SDValue FNeg = N->getOperand(0);
    1864          58 :     if (FNeg.getOpcode() != ISD::FNEG) {
    1865          42 :       return SDValue();
    1866             :     }
    1867          16 :     SDValue SelectCC = FNeg.getOperand(0);
    1868             :     if (SelectCC.getOpcode() != ISD::SELECT_CC ||
    1869           8 :         SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
    1870          16 :         SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
    1871          32 :         !isHWTrueValue(SelectCC.getOperand(2)) ||
    1872           8 :         !isHWFalseValue(SelectCC.getOperand(3))) {
    1873           8 :       return SDValue();
    1874             :     }
    1875             : 
    1876             :     return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
    1877             :                            SelectCC.getOperand(0), // LHS
    1878             :                            SelectCC.getOperand(1), // RHS
    1879             :                            DAG.getConstant(-1, DL, MVT::i32), // True
    1880             :                            DAG.getConstant(0, DL, MVT::i32),  // False
    1881           8 :                            SelectCC.getOperand(4)); // CC
    1882             : 
    1883             :     break;
    1884             :   }
    1885             : 
    1886             :   // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
    1887             :   // => build_vector elt0, ... , NewEltIdx, ... , eltN
    1888         278 :   case ISD::INSERT_VECTOR_ELT: {
    1889         278 :     SDValue InVec = N->getOperand(0);
    1890         278 :     SDValue InVal = N->getOperand(1);
    1891         278 :     SDValue EltNo = N->getOperand(2);
    1892             : 
    1893             :     // If the inserted element is an UNDEF, just use the input vector.
    1894         278 :     if (InVal.isUndef())
    1895           0 :       return InVec;
    1896             : 
    1897         278 :     EVT VT = InVec.getValueType();
    1898             : 
    1899             :     // If we can't generate a legal BUILD_VECTOR, exit
    1900             :     if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
    1901           8 :       return SDValue();
    1902             : 
    1903             :     // Check that we know which element is being inserted
    1904             :     if (!isa<ConstantSDNode>(EltNo))
    1905           4 :       return SDValue();
    1906         266 :     unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
    1907             : 
    1908             :     // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
    1909             :     // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
    1910             :     // vector elements.
    1911             :     SmallVector<SDValue, 8> Ops;
    1912         266 :     if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
    1913           8 :       Ops.append(InVec.getNode()->op_begin(),
    1914             :                  InVec.getNode()->op_end());
    1915         262 :     } else if (InVec.isUndef()) {
    1916             :       unsigned NElts = VT.getVectorNumElements();
    1917           0 :       Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
    1918             :     } else {
    1919         262 :       return SDValue();
    1920             :     }
    1921             : 
    1922             :     // Insert the element
    1923           8 :     if (Elt < Ops.size()) {
    1924             :       // All the operands of BUILD_VECTOR must have the same type;
    1925             :       // we enforce that here.
    1926           8 :       EVT OpVT = Ops[0].getValueType();
    1927           0 :       if (InVal.getValueType() != OpVT)
    1928           0 :         InVal = OpVT.bitsGT(InVal.getValueType()) ?
    1929           0 :           DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
    1930           0 :           DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
    1931           4 :       Ops[Elt] = InVal;
    1932             :     }
    1933             : 
    1934             :     // Return the new vector
    1935           4 :     return DAG.getBuildVector(VT, DL, Ops);
    1936             :   }
    1937             : 
    1938             :   // Extract_vec (Build_vector) generated by custom lowering
    1939             :   // also needs to be customly combined
    1940       10694 :   case ISD::EXTRACT_VECTOR_ELT: {
    1941       10694 :     SDValue Arg = N->getOperand(0);
    1942       10694 :     if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
    1943             :       if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
    1944           2 :         unsigned Element = Const->getZExtValue();
    1945           4 :         return Arg->getOperand(Element);
    1946             :       }
    1947             :     }
    1948         201 :     if (Arg.getOpcode() == ISD::BITCAST &&
    1949       10705 :         Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
    1950          26 :         (Arg.getOperand(0).getValueType().getVectorNumElements() ==
    1951       10705 :          Arg.getValueType().getVectorNumElements())) {
    1952             :       if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
    1953          13 :         unsigned Element = Const->getZExtValue();
    1954             :         return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
    1955          26 :                            Arg->getOperand(0).getOperand(Element));
    1956             :       }
    1957             :     }
    1958             :     break;
    1959             :   }
    1960             : 
    1961       11395 :   case ISD::SELECT_CC: {
    1962             :     // Try common optimizations
    1963       11395 :     if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
    1964           0 :       return Ret;
    1965             : 
    1966             :     // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
    1967             :     //      selectcc x, y, a, b, inv(cc)
    1968             :     //
    1969             :     // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
    1970             :     //      selectcc x, y, a, b, cc
    1971       11395 :     SDValue LHS = N->getOperand(0);
    1972       11395 :     if (LHS.getOpcode() != ISD::SELECT_CC) {
    1973        5656 :       return SDValue();
    1974             :     }
    1975             : 
    1976        5739 :     SDValue RHS = N->getOperand(1);
    1977        5739 :     SDValue True = N->getOperand(2);
    1978        5739 :     SDValue False = N->getOperand(3);
    1979        5739 :     ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
    1980             : 
    1981        5765 :     if (LHS.getOperand(2).getNode() != True.getNode() ||
    1982        5739 :         LHS.getOperand(3).getNode() != False.getNode() ||
    1983             :         RHS.getNode() != False.getNode()) {
    1984        5735 :       return SDValue();
    1985             :     }
    1986             : 
    1987             :     switch (NCC) {
    1988           0 :     default: return SDValue();
    1989           1 :     case ISD::SETNE: return LHS;
    1990           3 :     case ISD::SETEQ: {
    1991           3 :       ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
    1992           3 :       LHSCC = ISD::getSetCCInverse(LHSCC,
    1993           6 :                                   LHS.getOperand(0).getValueType().isInteger());
    1994           6 :       if (DCI.isBeforeLegalizeOps() ||
    1995             :           isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
    1996             :         return DAG.getSelectCC(DL,
    1997             :                                LHS.getOperand(0),
    1998             :                                LHS.getOperand(1),
    1999             :                                LHS.getOperand(2),
    2000             :                                LHS.getOperand(3),
    2001           0 :                                LHSCC);
    2002             :       break;
    2003             :     }
    2004             :     }
    2005           3 :     return SDValue();
    2006             :   }
    2007             : 
    2008         138 :   case AMDGPUISD::R600_EXPORT: {
    2009         138 :     SDValue Arg = N->getOperand(1);
    2010         138 :     if (Arg.getOpcode() != ISD::BUILD_VECTOR)
    2011             :       break;
    2012             : 
    2013             :     SDValue NewArgs[8] = {
    2014             :       N->getOperand(0), // Chain
    2015             :       SDValue(),
    2016             :       N->getOperand(2), // ArrayBase
    2017             :       N->getOperand(3), // Type
    2018             :       N->getOperand(4), // SWZ_X
    2019             :       N->getOperand(5), // SWZ_Y
    2020             :       N->getOperand(6), // SWZ_Z
    2021             :       N->getOperand(7) // SWZ_W
    2022         120 :     };
    2023         120 :     NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
    2024         240 :     return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
    2025             :   }
    2026         296 :   case AMDGPUISD::TEXTURE_FETCH: {
    2027         296 :     SDValue Arg = N->getOperand(1);
    2028         296 :     if (Arg.getOpcode() != ISD::BUILD_VECTOR)
    2029             :       break;
    2030             : 
    2031             :     SDValue NewArgs[19] = {
    2032             :       N->getOperand(0),
    2033             :       N->getOperand(1),
    2034             :       N->getOperand(2),
    2035             :       N->getOperand(3),
    2036             :       N->getOperand(4),
    2037             :       N->getOperand(5),
    2038             :       N->getOperand(6),
    2039             :       N->getOperand(7),
    2040             :       N->getOperand(8),
    2041             :       N->getOperand(9),
    2042             :       N->getOperand(10),
    2043             :       N->getOperand(11),
    2044             :       N->getOperand(12),
    2045             :       N->getOperand(13),
    2046             :       N->getOperand(14),
    2047             :       N->getOperand(15),
    2048             :       N->getOperand(16),
    2049             :       N->getOperand(17),
    2050             :       N->getOperand(18),
    2051         272 :     };
    2052         272 :     NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
    2053         544 :     return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
    2054             :   }
    2055             : 
    2056             :   case ISD::LOAD: {
    2057             :     LoadSDNode *LoadNode = cast<LoadSDNode>(N);
    2058       41071 :     SDValue Ptr = LoadNode->getBasePtr();
    2059       41071 :     if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
    2060             :          isa<ConstantSDNode>(Ptr))
    2061        6888 :       return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
    2062             :     break;
    2063             :   }
    2064             : 
    2065             :   default: break;
    2066             :   }
    2067             : 
    2068      194640 :   return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
    2069             : }
    2070             : 
    2071      243106 : bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
    2072             :                                      SDValue &Src, SDValue &Neg, SDValue &Abs,
    2073             :                                      SDValue &Sel, SDValue &Imm,
    2074             :                                      SelectionDAG &DAG) const {
    2075      243106 :   const R600InstrInfo *TII = Subtarget->getInstrInfo();
    2076      486212 :   if (!Src.isMachineOpcode())
    2077             :     return false;
    2078             : 
    2079      155797 :   switch (Src.getMachineOpcode()) {
    2080         125 :   case R600::FNEG_R600:
    2081         125 :     if (!Neg.getNode())
    2082             :       return false;
    2083         101 :     Src = Src.getOperand(0);
    2084         101 :     Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
    2085         101 :     return true;
    2086         109 :   case R600::FABS_R600:
    2087         109 :     if (!Abs.getNode())
    2088             :       return false;
    2089          93 :     Src = Src.getOperand(0);
    2090          93 :     Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
    2091          93 :     return true;
    2092       10803 :   case R600::CONST_COPY: {
    2093       10803 :     unsigned Opcode = ParentNode->getMachineOpcode();
    2094       10803 :     bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
    2095             : 
    2096       10803 :     if (!Sel.getNode())
    2097             :       return false;
    2098             : 
    2099        9456 :     SDValue CstOffset = Src.getOperand(0);
    2100       28368 :     if (ParentNode->getValueType(0).isVector())
    2101             :       return false;
    2102             : 
    2103             :     // Gather constants values
    2104             :     int SrcIndices[] = {
    2105        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src0),
    2106        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src1),
    2107        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src2),
    2108        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src0_X),
    2109        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
    2110        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
    2111        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src0_W),
    2112        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src1_X),
    2113        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
    2114        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
    2115        9456 :       TII->getOperandIdx(Opcode, R600::OpName::src1_W)
    2116        9456 :     };
    2117             :     std::vector<unsigned> Consts;
    2118      113472 :     for (int OtherSrcIdx : SrcIndices) {
    2119      104016 :       int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
    2120      104016 :       if (OtherSrcIdx < 0 || OtherSelIdx < 0)
    2121             :         continue;
    2122       19164 :       if (HasDst) {
    2123       19164 :         OtherSrcIdx--;
    2124       19164 :         OtherSelIdx--;
    2125             :       }
    2126             :       if (RegisterSDNode *Reg =
    2127       19164 :           dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
    2128         766 :         if (Reg->getReg() == R600::ALU_CONST) {
    2129             :           ConstantSDNode *Cst
    2130         596 :             = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
    2131        1192 :           Consts.push_back(Cst->getZExtValue());
    2132             :         }
    2133             :       }
    2134             :     }
    2135             : 
    2136             :     ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
    2137       18912 :     Consts.push_back(Cst->getZExtValue());
    2138        9456 :     if (!TII->fitsConstReadLimitations(Consts)) {
    2139             :       return false;
    2140             :     }
    2141             : 
    2142        9436 :     Sel = CstOffset;
    2143        9436 :     Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
    2144        9436 :     return true;
    2145             :   }
    2146             :   case R600::MOV_IMM_GLOBAL_ADDR:
    2147             :     // Check if the Imm slot is used. Taken from below.
    2148          28 :     if (cast<ConstantSDNode>(Imm)->getZExtValue())
    2149             :       return false;
    2150          14 :     Imm = Src.getOperand(0);
    2151          14 :     Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
    2152          14 :     return true;
    2153       27717 :   case R600::MOV_IMM_I32:
    2154             :   case R600::MOV_IMM_F32: {
    2155             :     unsigned ImmReg = R600::ALU_LITERAL_X;
    2156             :     uint64_t ImmValue = 0;
    2157             : 
    2158       27717 :     if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
    2159             :       ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
    2160        1072 :       float FloatValue = FPC->getValueAPF().convertToFloat();
    2161         536 :       if (FloatValue == 0.0) {
    2162             :         ImmReg = R600::ZERO;
    2163         394 :       } else if (FloatValue == 0.5) {
    2164             :         ImmReg = R600::HALF;
    2165         357 :       } else if (FloatValue == 1.0) {
    2166             :         ImmReg = R600::ONE;
    2167             :       } else {
    2168         810 :         ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
    2169             :       }
    2170             :     } else {
    2171             :       ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
    2172       27181 :       uint64_t Value = C->getZExtValue();
    2173       27181 :       if (Value == 0) {
    2174             :         ImmReg = R600::ZERO;
    2175       25442 :       } else if (Value == 1) {
    2176             :         ImmReg = R600::ONE_INT;
    2177             :       } else {
    2178             :         ImmValue = Value;
    2179             :       }
    2180             :     }
    2181             : 
    2182             :     // Check that we aren't already using an immediate.
    2183             :     // XXX: It's possible for an instruction to have more than one
    2184             :     // immediate operand, but this is not supported yet.
    2185             :     if (ImmReg == R600::ALU_LITERAL_X) {
    2186       22839 :       if (!Imm.getNode())
    2187             :         return false;
    2188             :       ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
    2189             :       assert(C);
    2190       45532 :       if (C->getZExtValue())
    2191             :         return false;
    2192       20522 :       Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
    2193             :     }
    2194       25400 :     Src = DAG.getRegister(ImmReg, MVT::i32);
    2195       25400 :     return true;
    2196             :   }
    2197             :   default:
    2198             :     return false;
    2199             :   }
    2200             : }
    2201             : 
    2202             : /// Fold the instructions after selecting them
    2203      164511 : SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
    2204             :                                             SelectionDAG &DAG) const {
    2205      164511 :   const R600InstrInfo *TII = Subtarget->getInstrInfo();
    2206      164511 :   if (!Node->isMachineOpcode())
    2207             :     return Node;
    2208             : 
    2209             :   unsigned Opcode = Node->getMachineOpcode();
    2210      164511 :   SDValue FakeOp;
    2211             : 
    2212      164511 :   std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
    2213             : 
    2214      164511 :   if (Opcode == R600::DOT_4) {
    2215             :     int OperandIdx[] = {
    2216         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_X),
    2217         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
    2218         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
    2219         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_W),
    2220         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_X),
    2221         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
    2222         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
    2223         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_W)
    2224         148 :         };
    2225             :     int NegIdx[] = {
    2226         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
    2227         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
    2228         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
    2229         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
    2230         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
    2231         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
    2232         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
    2233         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
    2234         148 :     };
    2235             :     int AbsIdx[] = {
    2236         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
    2237         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
    2238         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
    2239         148 :       TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
    2240         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
    2241         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
    2242         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
    2243         148 :       TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
    2244         148 :     };
    2245        1070 :     for (unsigned i = 0; i < 8; i++) {
    2246        1002 :       if (OperandIdx[i] < 0)
    2247          80 :         return Node;
    2248        1002 :       SDValue &Src = Ops[OperandIdx[i] - 1];
    2249        1002 :       SDValue &Neg = Ops[NegIdx[i] - 1];
    2250        1002 :       SDValue &Abs = Ops[AbsIdx[i] - 1];
    2251        1002 :       bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
    2252        1002 :       int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
    2253        1002 :       if (HasDst)
    2254        1002 :         SelIdx--;
    2255        1002 :       SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
    2256        1002 :       if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
    2257         160 :         return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
    2258             :     }
    2259      164363 :   } else if (Opcode == R600::REG_SEQUENCE) {
    2260       25207 :     for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
    2261       20571 :       SDValue &Src = Ops[i];
    2262       20571 :       if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
    2263        2646 :         return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
    2264             :     }
    2265             :   } else {
    2266      158404 :     if (!TII->hasInstrModifiers(Opcode))
    2267      143947 :       return Node;
    2268             :     int OperandIdx[] = {
    2269      107199 :       TII->getOperandIdx(Opcode, R600::OpName::src0),
    2270      107199 :       TII->getOperandIdx(Opcode, R600::OpName::src1),
    2271      107199 :       TII->getOperandIdx(Opcode, R600::OpName::src2)
    2272      107199 :     };
    2273             :     int NegIdx[] = {
    2274      107199 :       TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
    2275      107199 :       TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
    2276      107199 :       TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
    2277      107199 :     };
    2278      107199 :     int AbsIdx[] = {
    2279      107199 :       TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
    2280      107199 :       TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
    2281             :       -1
    2282      107199 :     };
    2283      295091 :     for (unsigned i = 0; i < 3; i++) {
    2284      280634 :       if (OperandIdx[i] < 0)
    2285       92742 :         return Node;
    2286      221533 :       SDValue &Src = Ops[OperandIdx[i] - 1];
    2287      221533 :       SDValue &Neg = Ops[NegIdx[i] - 1];
    2288      221533 :       SDValue FakeAbs;
    2289      221533 :       SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
    2290      221533 :       bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
    2291      221533 :       int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
    2292      221533 :       int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
    2293      221533 :       if (HasDst) {
    2294      221533 :         SelIdx--;
    2295      221533 :         ImmIdx--;
    2296             :       }
    2297      221533 :       SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
    2298      221533 :       SDValue &Imm = Ops[ImmIdx];
    2299      221533 :       if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
    2300       67282 :         return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
    2301             :     }
    2302             :   }
    2303             : 
    2304             :   return Node;
    2305             : }

Generated by: LCOV version 1.13