LLVM  7.0.0svn
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Custom DAG lowering for R600
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600ISelLowering.h"
16 #include "AMDGPUFrameLowering.h"
17 #include "AMDGPUIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "R600Defines.h"
20 #include "R600FrameLowering.h"
21 #include "R600InstrInfo.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Compiler.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <iterator>
49 #include <utility>
50 #include <vector>
51 
52 using namespace llvm;
53 
55  const R600Subtarget &STI)
56  : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
57  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
58  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
59  addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
60  addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
61  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
62  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
63 
65 
66  // Legalize loads and stores to the private address space.
70 
71  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
72  // spaces, so it is custom lowered to handle those where it isn't.
73  for (MVT VT : MVT::integer_valuetypes()) {
77 
81 
85  }
86 
87  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
91 
95 
100 
103  // We need to include these since trunc STORES to PRIVATE need
104  // special handling to accommodate RMW
115 
116  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
119 
120  // Set condition code actions
133 
138 
141 
144 
148 
150 
153 
160 
165 
166  // ADD, SUB overflow.
167  // TODO: turn these into Legal?
168  if (Subtarget->hasCARRY())
170 
171  if (Subtarget->hasBORROW())
173 
174  // Expand sign extension of vectors
175  if (!Subtarget->hasBFE())
177 
180 
181  if (!Subtarget->hasBFE())
185 
186  if (!Subtarget->hasBFE())
190 
194 
196 
198 
203 
208 
209  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
210  // to be Legal/Custom in order to avoid library calls.
214 
215  if (!Subtarget->hasFMA()) {
218  }
219 
221 
222  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
223  for (MVT VT : ScalarIntVTs) {
228  }
229 
230  // LLVM will expand these to atomic_cmp_swap(0)
231  // and atomic_swap, respectively.
234 
235  // We need to custom lower some of the intrinsics
238 
240 
247 }
248 
250  return static_cast<const R600Subtarget *>(Subtarget);
251 }
252 
253 static inline bool isEOP(MachineBasicBlock::iterator I) {
254  if (std::next(I) == I->getParent()->end())
255  return false;
256  return std::next(I)->getOpcode() == AMDGPU::RETURN;
257 }
258 
261  MachineBasicBlock *BB) const {
262  MachineFunction *MF = BB->getParent();
266 
267  switch (MI.getOpcode()) {
268  default:
269  // Replace LDS_*_RET instruction that don't have any uses with the
270  // equivalent LDS_*_NORET instruction.
271  if (TII->isLDSRetInstr(MI.getOpcode())) {
272  int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
273  assert(DstIdx != -1);
274  MachineInstrBuilder NewMI;
275  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
276  // LDS_1A2D support and remove this special case.
277  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
278  MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
279  return BB;
280 
281  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
282  TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
283  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
284  NewMI.add(MI.getOperand(i));
285  }
286  } else {
288  }
289  break;
290 
291  case AMDGPU::FABS_R600: {
293  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
294  MI.getOperand(1).getReg());
295  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
296  break;
297  }
298 
299  case AMDGPU::FNEG_R600: {
301  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
302  MI.getOperand(1).getReg());
303  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
304  break;
305  }
306 
307  case AMDGPU::MASK_WRITE: {
308  unsigned maskedRegister = MI.getOperand(0).getReg();
310  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
311  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
312  break;
313  }
314 
315  case AMDGPU::MOV_IMM_F32:
316  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
317  .getFPImm()
318  ->getValueAPF()
319  .bitcastToAPInt()
320  .getZExtValue());
321  break;
322 
323  case AMDGPU::MOV_IMM_I32:
324  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
325  MI.getOperand(1).getImm());
326  break;
327 
328  case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
329  //TODO: Perhaps combine this instruction with the next if possible
330  auto MIB = TII->buildDefaultInstruction(
331  *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
332  int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
333  //TODO: Ugh this is rather ugly
334  MIB->getOperand(Idx) = MI.getOperand(1);
335  break;
336  }
337 
338  case AMDGPU::CONST_COPY: {
340  *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
341  TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
342  MI.getOperand(1).getImm());
343  break;
344  }
345 
346  case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
347  case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
348  case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
349  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
350  .add(MI.getOperand(0))
351  .add(MI.getOperand(1))
352  .addImm(isEOP(I)); // Set End of program bit
353  break;
354 
355  case AMDGPU::RAT_STORE_TYPED_eg:
356  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
357  .add(MI.getOperand(0))
358  .add(MI.getOperand(1))
359  .add(MI.getOperand(2))
360  .addImm(isEOP(I)); // Set End of program bit
361  break;
362 
363  case AMDGPU::BRANCH:
364  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
365  .add(MI.getOperand(0));
366  break;
367 
368  case AMDGPU::BRANCH_COND_f32: {
369  MachineInstr *NewMI =
370  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
371  AMDGPU::PREDICATE_BIT)
372  .add(MI.getOperand(1))
373  .addImm(AMDGPU::PRED_SETNE)
374  .addImm(0); // Flags
375  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
376  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
377  .add(MI.getOperand(0))
378  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
379  break;
380  }
381 
382  case AMDGPU::BRANCH_COND_i32: {
383  MachineInstr *NewMI =
384  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
385  AMDGPU::PREDICATE_BIT)
386  .add(MI.getOperand(1))
387  .addImm(AMDGPU::PRED_SETNE_INT)
388  .addImm(0); // Flags
389  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
390  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
391  .add(MI.getOperand(0))
392  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
393  break;
394  }
395 
396  case AMDGPU::EG_ExportSwz:
397  case AMDGPU::R600_ExportSwz: {
398  // Instruction is left unmodified if its not the last one of its type
399  bool isLastInstructionOfItsType = true;
400  unsigned InstExportType = MI.getOperand(1).getImm();
401  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
402  EndBlock = BB->end(); NextExportInst != EndBlock;
403  NextExportInst = std::next(NextExportInst)) {
404  if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
405  NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
406  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
407  .getImm();
408  if (CurrentInstExportType == InstExportType) {
409  isLastInstructionOfItsType = false;
410  break;
411  }
412  }
413  }
414  bool EOP = isEOP(I);
415  if (!EOP && !isLastInstructionOfItsType)
416  return BB;
417  unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
418  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
419  .add(MI.getOperand(0))
420  .add(MI.getOperand(1))
421  .add(MI.getOperand(2))
422  .add(MI.getOperand(3))
423  .add(MI.getOperand(4))
424  .add(MI.getOperand(5))
425  .add(MI.getOperand(6))
426  .addImm(CfInst)
427  .addImm(EOP);
428  break;
429  }
430  case AMDGPU::RETURN: {
431  return BB;
432  }
433  }
434 
435  MI.eraseFromParent();
436  return BB;
437 }
438 
439 //===----------------------------------------------------------------------===//
440 // Custom DAG Lowering Operations
441 //===----------------------------------------------------------------------===//
442 
446  switch (Op.getOpcode()) {
447  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
448  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
449  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
450  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
451  case ISD::SRA_PARTS:
452  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
453  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
454  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
455  case ISD::FCOS:
456  case ISD::FSIN: return LowerTrig(Op, DAG);
457  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
458  case ISD::STORE: return LowerSTORE(Op, DAG);
459  case ISD::LOAD: {
460  SDValue Result = LowerLOAD(Op, DAG);
461  assert((!Result.getNode() ||
462  Result.getNode()->getNumValues() == 2) &&
463  "Load should return a value and a chain");
464  return Result;
465  }
466 
467  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
468  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
469  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
470  case ISD::INTRINSIC_VOID: {
471  SDValue Chain = Op.getOperand(0);
472  unsigned IntrinsicID =
473  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
474  switch (IntrinsicID) {
475  case Intrinsic::r600_store_swizzle: {
476  SDLoc DL(Op);
477  const SDValue Args[8] = {
478  Chain,
479  Op.getOperand(2), // Export Value
480  Op.getOperand(3), // ArrayBase
481  Op.getOperand(4), // Type
482  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
483  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
484  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
485  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
486  };
487  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
488  }
489 
490  // default for switch(IntrinsicID)
491  default: break;
492  }
493  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
494  break;
495  }
497  unsigned IntrinsicID =
498  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
499  EVT VT = Op.getValueType();
500  SDLoc DL(Op);
501  switch (IntrinsicID) {
502  case Intrinsic::r600_tex:
503  case Intrinsic::r600_texc: {
504  unsigned TextureOp;
505  switch (IntrinsicID) {
506  case Intrinsic::r600_tex:
507  TextureOp = 0;
508  break;
509  case Intrinsic::r600_texc:
510  TextureOp = 1;
511  break;
512  default:
513  llvm_unreachable("unhandled texture operation");
514  }
515 
516  SDValue TexArgs[19] = {
517  DAG.getConstant(TextureOp, DL, MVT::i32),
518  Op.getOperand(1),
519  DAG.getConstant(0, DL, MVT::i32),
520  DAG.getConstant(1, DL, MVT::i32),
521  DAG.getConstant(2, DL, MVT::i32),
522  DAG.getConstant(3, DL, MVT::i32),
523  Op.getOperand(2),
524  Op.getOperand(3),
525  Op.getOperand(4),
526  DAG.getConstant(0, DL, MVT::i32),
527  DAG.getConstant(1, DL, MVT::i32),
528  DAG.getConstant(2, DL, MVT::i32),
529  DAG.getConstant(3, DL, MVT::i32),
530  Op.getOperand(5),
531  Op.getOperand(6),
532  Op.getOperand(7),
533  Op.getOperand(8),
534  Op.getOperand(9),
535  Op.getOperand(10)
536  };
537  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
538  }
539  case Intrinsic::r600_dot4: {
540  SDValue Args[8] = {
542  DAG.getConstant(0, DL, MVT::i32)),
544  DAG.getConstant(0, DL, MVT::i32)),
546  DAG.getConstant(1, DL, MVT::i32)),
548  DAG.getConstant(1, DL, MVT::i32)),
550  DAG.getConstant(2, DL, MVT::i32)),
552  DAG.getConstant(2, DL, MVT::i32)),
554  DAG.getConstant(3, DL, MVT::i32)),
556  DAG.getConstant(3, DL, MVT::i32))
557  };
558  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
559  }
560 
561  case Intrinsic::r600_implicitarg_ptr: {
564  return DAG.getConstant(ByteOffset, DL, PtrVT);
565  }
566  case Intrinsic::r600_read_ngroups_x:
567  return LowerImplicitParameter(DAG, VT, DL, 0);
568  case Intrinsic::r600_read_ngroups_y:
569  return LowerImplicitParameter(DAG, VT, DL, 1);
570  case Intrinsic::r600_read_ngroups_z:
571  return LowerImplicitParameter(DAG, VT, DL, 2);
572  case Intrinsic::r600_read_global_size_x:
573  return LowerImplicitParameter(DAG, VT, DL, 3);
574  case Intrinsic::r600_read_global_size_y:
575  return LowerImplicitParameter(DAG, VT, DL, 4);
576  case Intrinsic::r600_read_global_size_z:
577  return LowerImplicitParameter(DAG, VT, DL, 5);
578  case Intrinsic::r600_read_local_size_x:
579  return LowerImplicitParameter(DAG, VT, DL, 6);
580  case Intrinsic::r600_read_local_size_y:
581  return LowerImplicitParameter(DAG, VT, DL, 7);
582  case Intrinsic::r600_read_local_size_z:
583  return LowerImplicitParameter(DAG, VT, DL, 8);
584 
585  case Intrinsic::r600_read_tgid_x:
586  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
587  AMDGPU::T1_X, VT);
588  case Intrinsic::r600_read_tgid_y:
589  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
590  AMDGPU::T1_Y, VT);
591  case Intrinsic::r600_read_tgid_z:
592  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
593  AMDGPU::T1_Z, VT);
594  case Intrinsic::r600_read_tidig_x:
595  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
596  AMDGPU::T0_X, VT);
597  case Intrinsic::r600_read_tidig_y:
598  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
599  AMDGPU::T0_Y, VT);
600  case Intrinsic::r600_read_tidig_z:
601  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
602  AMDGPU::T0_Z, VT);
603 
604  case Intrinsic::r600_recipsqrt_ieee:
605  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
606 
607  case Intrinsic::r600_recipsqrt_clamped:
608  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
609  default:
610  return Op;
611  }
612 
613  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
614  break;
615  }
616  } // end switch(Op.getOpcode())
617  return SDValue();
618 }
619 
622  SelectionDAG &DAG) const {
623  switch (N->getOpcode()) {
624  default:
626  return;
627  case ISD::FP_TO_UINT:
628  if (N->getValueType(0) == MVT::i1) {
629  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
630  return;
631  }
632  // Since we don't care about out of bounds values we can use FP_TO_SINT for
633  // uints too. The DAGLegalizer code for uint considers some extra cases
634  // which are not necessary here.
636  case ISD::FP_TO_SINT: {
637  if (N->getValueType(0) == MVT::i1) {
638  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
639  return;
640  }
641 
642  SDValue Result;
643  if (expandFP_TO_SINT(N, Result, DAG))
644  Results.push_back(Result);
645  return;
646  }
647  case ISD::SDIVREM: {
648  SDValue Op = SDValue(N, 1);
649  SDValue RES = LowerSDIVREM(Op, DAG);
650  Results.push_back(RES);
651  Results.push_back(RES.getValue(1));
652  break;
653  }
654  case ISD::UDIVREM: {
655  SDValue Op = SDValue(N, 0);
656  LowerUDIVREM64(Op, DAG, Results);
657  break;
658  }
659  }
660 }
661 
662 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
663  SDValue Vector) const {
664  SDLoc DL(Vector);
665  EVT VecVT = Vector.getValueType();
666  EVT EltVT = VecVT.getVectorElementType();
668 
669  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
670  Args.push_back(DAG.getNode(
671  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
672  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
673  }
674 
675  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
676 }
677 
678 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
679  SelectionDAG &DAG) const {
680  SDLoc DL(Op);
681  SDValue Vector = Op.getOperand(0);
682  SDValue Index = Op.getOperand(1);
683 
684  if (isa<ConstantSDNode>(Index) ||
686  return Op;
687 
688  Vector = vectorToVerticalVector(DAG, Vector);
689  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
690  Vector, Index);
691 }
692 
693 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
694  SelectionDAG &DAG) const {
695  SDLoc DL(Op);
696  SDValue Vector = Op.getOperand(0);
697  SDValue Value = Op.getOperand(1);
698  SDValue Index = Op.getOperand(2);
699 
700  if (isa<ConstantSDNode>(Index) ||
702  return Op;
703 
704  Vector = vectorToVerticalVector(DAG, Vector);
705  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
706  Vector, Value, Index);
707  return vectorToVerticalVector(DAG, Insert);
708 }
709 
710 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
711  SDValue Op,
712  SelectionDAG &DAG) const {
713  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
715  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
716 
717  const DataLayout &DL = DAG.getDataLayout();
718  const GlobalValue *GV = GSD->getGlobal();
719  MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
720 
721  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
722  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
723 }
724 
725 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
726  // On hw >= R700, COS/SIN input must be between -1. and 1.
727  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
728  EVT VT = Op.getValueType();
729  SDValue Arg = Op.getOperand(0);
730  SDLoc DL(Op);
731 
732  // TODO: Should this propagate fast-math-flags?
733  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
734  DAG.getNode(ISD::FADD, DL, VT,
735  DAG.getNode(ISD::FMUL, DL, VT, Arg,
736  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
737  DAG.getConstantFP(0.5, DL, MVT::f32)));
738  unsigned TrigNode;
739  switch (Op.getOpcode()) {
740  case ISD::FCOS:
741  TrigNode = AMDGPUISD::COS_HW;
742  break;
743  case ISD::FSIN:
744  TrigNode = AMDGPUISD::SIN_HW;
745  break;
746  default:
747  llvm_unreachable("Wrong trig opcode");
748  }
749  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
750  DAG.getNode(ISD::FADD, DL, VT, FractPart,
751  DAG.getConstantFP(-0.5, DL, MVT::f32)));
752  if (Gen >= R600Subtarget::R700)
753  return TrigVal;
754  // On R600 hw, COS/SIN input must be between -Pi and Pi.
755  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
756  DAG.getConstantFP(3.14159265359, DL, MVT::f32));
757 }
758 
759 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
760  SDLoc DL(Op);
761  EVT VT = Op.getValueType();
762 
763  SDValue Lo = Op.getOperand(0);
764  SDValue Hi = Op.getOperand(1);
765  SDValue Shift = Op.getOperand(2);
766  SDValue Zero = DAG.getConstant(0, DL, VT);
767  SDValue One = DAG.getConstant(1, DL, VT);
768 
769  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
770  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
771  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
772  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
773 
774  // The dance around Width1 is necessary for 0 special case.
775  // Without it the CompShift might be 32, producing incorrect results in
776  // Overflow. So we do the shift in two steps, the alternative is to
777  // add a conditional to filter the special case.
778 
779  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
780  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
781 
782  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
783  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
784  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
785 
786  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
787  SDValue LoBig = Zero;
788 
789  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
790  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
791 
792  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
793 }
794 
795 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
796  SDLoc DL(Op);
797  EVT VT = Op.getValueType();
798 
799  SDValue Lo = Op.getOperand(0);
800  SDValue Hi = Op.getOperand(1);
801  SDValue Shift = Op.getOperand(2);
802  SDValue Zero = DAG.getConstant(0, DL, VT);
803  SDValue One = DAG.getConstant(1, DL, VT);
804 
805  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
806 
807  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
808  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
809  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
810  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
811 
812  // The dance around Width1 is necessary for 0 special case.
813  // Without it the CompShift might be 32, producing incorrect results in
814  // Overflow. So we do the shift in two steps, the alternative is to
815  // add a conditional to filter the special case.
816 
817  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
818  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
819 
820  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
821  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
822  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
823 
824  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
825  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
826 
827  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
828  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
829 
830  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
831 }
832 
833 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
834  unsigned mainop, unsigned ovf) const {
835  SDLoc DL(Op);
836  EVT VT = Op.getValueType();
837 
838  SDValue Lo = Op.getOperand(0);
839  SDValue Hi = Op.getOperand(1);
840 
841  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
842  // Extend sign.
843  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
844  DAG.getValueType(MVT::i1));
845 
846  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
847 
848  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
849 }
850 
851 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
852  SDLoc DL(Op);
853  return DAG.getNode(
854  ISD::SETCC,
855  DL,
856  MVT::i1,
857  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
858  DAG.getCondCode(ISD::SETEQ));
859 }
860 
861 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
862  SDLoc DL(Op);
863  return DAG.getNode(
864  ISD::SETCC,
865  DL,
866  MVT::i1,
867  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
868  DAG.getCondCode(ISD::SETEQ));
869 }
870 
871 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
872  const SDLoc &DL,
873  unsigned DwordOffset) const {
874  unsigned ByteOffset = DwordOffset * 4;
875  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
877 
878  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
879  assert(isInt<16>(ByteOffset));
880 
881  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
882  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
884 }
885 
886 bool R600TargetLowering::isZero(SDValue Op) const {
887  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
888  return Cst->isNullValue();
889  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
890  return CstFP->isZero();
891  } else {
892  return false;
893  }
894 }
895 
896 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
897  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
898  return CFP->isExactlyValue(1.0);
899  }
900  return isAllOnesConstant(Op);
901 }
902 
903 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
904  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
905  return CFP->getValueAPF().isZero();
906  }
907  return isNullConstant(Op);
908 }
909 
910 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
911  SDLoc DL(Op);
912  EVT VT = Op.getValueType();
913 
914  SDValue LHS = Op.getOperand(0);
915  SDValue RHS = Op.getOperand(1);
916  SDValue True = Op.getOperand(2);
917  SDValue False = Op.getOperand(3);
918  SDValue CC = Op.getOperand(4);
919  SDValue Temp;
920 
921  if (VT == MVT::f32) {
922  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
923  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
924  if (MinMax)
925  return MinMax;
926  }
927 
928  // LHS and RHS are guaranteed to be the same value type
929  EVT CompareVT = LHS.getValueType();
930 
931  // Check if we can lower this to a native operation.
932 
933  // Try to lower to a SET* instruction:
934  //
935  // SET* can match the following patterns:
936  //
937  // select_cc f32, f32, -1, 0, cc_supported
938  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
939  // select_cc i32, i32, -1, 0, cc_supported
940  //
941 
942  // Move hardware True/False values to the correct operand.
943  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
944  ISD::CondCode InverseCC =
945  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
946  if (isHWTrueValue(False) && isHWFalseValue(True)) {
947  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
948  std::swap(False, True);
949  CC = DAG.getCondCode(InverseCC);
950  } else {
951  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
952  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
953  std::swap(False, True);
954  std::swap(LHS, RHS);
955  CC = DAG.getCondCode(SwapInvCC);
956  }
957  }
958  }
959 
960  if (isHWTrueValue(True) && isHWFalseValue(False) &&
961  (CompareVT == VT || VT == MVT::i32)) {
962  // This can be matched by a SET* instruction.
963  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
964  }
965 
966  // Try to lower to a CND* instruction:
967  //
968  // CND* can match the following patterns:
969  //
970  // select_cc f32, 0.0, f32, f32, cc_supported
971  // select_cc f32, 0.0, i32, i32, cc_supported
972  // select_cc i32, 0, f32, f32, cc_supported
973  // select_cc i32, 0, i32, i32, cc_supported
974  //
975 
976  // Try to move the zero value to the RHS
977  if (isZero(LHS)) {
978  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
979  // Try swapping the operands
980  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
981  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
982  std::swap(LHS, RHS);
983  CC = DAG.getCondCode(CCSwapped);
984  } else {
985  // Try inverting the conditon and then swapping the operands
986  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
987  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
988  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
989  std::swap(True, False);
990  std::swap(LHS, RHS);
991  CC = DAG.getCondCode(CCSwapped);
992  }
993  }
994  }
995  if (isZero(RHS)) {
996  SDValue Cond = LHS;
997  SDValue Zero = RHS;
998  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
999  if (CompareVT != VT) {
1000  // Bitcast True / False to the correct types. This will end up being
1001  // a nop, but it allows us to define only a single pattern in the
1002  // .TD files for each CND* instruction rather than having to have
1003  // one pattern for integer True/False and one for fp True/False
1004  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1005  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1006  }
1007 
1008  switch (CCOpcode) {
1009  case ISD::SETONE:
1010  case ISD::SETUNE:
1011  case ISD::SETNE:
1012  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1013  Temp = True;
1014  True = False;
1015  False = Temp;
1016  break;
1017  default:
1018  break;
1019  }
1020  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1021  Cond, Zero,
1022  True, False,
1023  DAG.getCondCode(CCOpcode));
1024  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1025  }
1026 
1027  // If we make it this for it means we have no native instructions to handle
1028  // this SELECT_CC, so we must lower it.
1029  SDValue HWTrue, HWFalse;
1030 
1031  if (CompareVT == MVT::f32) {
1032  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1033  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1034  } else if (CompareVT == MVT::i32) {
1035  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1036  HWFalse = DAG.getConstant(0, DL, CompareVT);
1037  }
1038  else {
1039  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1040  }
1041 
1042  // Lower this unsupported SELECT_CC into a combination of two supported
1043  // SELECT_CC operations.
1044  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1045 
1046  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1047  Cond, HWFalse,
1048  True, False,
1049  DAG.getCondCode(ISD::SETNE));
1050 }
1051 
1052 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1053 /// convert these pointers to a register index. Each register holds
1054 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1055 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1056 /// for indirect addressing.
1057 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1058  unsigned StackWidth,
1059  SelectionDAG &DAG) const {
1060  unsigned SRLPad;
1061  switch(StackWidth) {
1062  case 1:
1063  SRLPad = 2;
1064  break;
1065  case 2:
1066  SRLPad = 3;
1067  break;
1068  case 4:
1069  SRLPad = 4;
1070  break;
1071  default: llvm_unreachable("Invalid stack width");
1072  }
1073 
1074  SDLoc DL(Ptr);
1075  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1076  DAG.getConstant(SRLPad, DL, MVT::i32));
1077 }
1078 
1079 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1080  unsigned ElemIdx,
1081  unsigned &Channel,
1082  unsigned &PtrIncr) const {
1083  switch (StackWidth) {
1084  default:
1085  case 1:
1086  Channel = 0;
1087  if (ElemIdx > 0) {
1088  PtrIncr = 1;
1089  } else {
1090  PtrIncr = 0;
1091  }
1092  break;
1093  case 2:
1094  Channel = ElemIdx % 2;
1095  if (ElemIdx == 2) {
1096  PtrIncr = 1;
1097  } else {
1098  PtrIncr = 0;
1099  }
1100  break;
1101  case 4:
1102  Channel = ElemIdx;
1103  PtrIncr = 0;
1104  break;
1105  }
1106 }
1107 
1108 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1109  SelectionDAG &DAG) const {
1110  SDLoc DL(Store);
1111  //TODO: Who creates the i8 stores?
1112  assert(Store->isTruncatingStore()
1113  || Store->getValue().getValueType() == MVT::i8);
1115 
1116  SDValue Mask;
1117  if (Store->getMemoryVT() == MVT::i8) {
1118  assert(Store->getAlignment() >= 1);
1119  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1120  } else if (Store->getMemoryVT() == MVT::i16) {
1121  assert(Store->getAlignment() >= 2);
1122  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1123  } else {
1124  llvm_unreachable("Unsupported private trunc store");
1125  }
1126 
1127  SDValue OldChain = Store->getChain();
1128  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1129  // Skip dummy
1130  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1131  SDValue BasePtr = Store->getBasePtr();
1132  SDValue Offset = Store->getOffset();
1133  EVT MemVT = Store->getMemoryVT();
1134 
1135  SDValue LoadPtr = BasePtr;
1136  if (!Offset.isUndef()) {
1137  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1138  }
1139 
1140  // Get dword location
1141  // TODO: this should be eliminated by the future SHR ptr, 2
1142  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1143  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1144 
1145  // Load dword
1146  // TODO: can we be smarter about machine pointer info?
1149  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1150 
1151  Chain = Dst.getValue(1);
1152 
1153  // Get offset in dword
1154  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1155  DAG.getConstant(0x3, DL, MVT::i32));
1156 
1157  // Convert byte offset to bit shift
1158  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1159  DAG.getConstant(3, DL, MVT::i32));
1160 
1161  // TODO: Contrary to the name of the functiom,
1162  // it also handles sub i32 non-truncating stores (like i1)
1163  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1164  Store->getValue());
1165 
1166  // Mask the value to the right type
1167  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1168 
1169  // Shift the value in place
1170  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1171  MaskedValue, ShiftAmt);
1172 
1173  // Shift the mask in place
1174  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1175 
1176  // Invert the mask. NOTE: if we had native ROL instructions we could
1177  // use inverted mask
1178  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1179 
1180  // Cleanup the target bits
1181  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1182 
1183  // Add the new bits
1184  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1185 
1186  // Store dword
1187  // TODO: Can we be smarter about MachinePointerInfo?
1188  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1189 
1190  // If we are part of expanded vector, make our neighbors depend on this store
1191  if (VectorTrunc) {
1192  // Make all other vector elements depend on this store
1193  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1194  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1195  }
1196  return NewStore;
1197 }
1198 
1199 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1200  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1201  unsigned AS = StoreNode->getAddressSpace();
1202 
1203  SDValue Chain = StoreNode->getChain();
1204  SDValue Ptr = StoreNode->getBasePtr();
1205  SDValue Value = StoreNode->getValue();
1206 
1207  EVT VT = Value.getValueType();
1208  EVT MemVT = StoreNode->getMemoryVT();
1209  EVT PtrVT = Ptr.getValueType();
1210 
1211  SDLoc DL(Op);
1212 
1213  // Neither LOCAL nor PRIVATE can do vectors at the moment
1214  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
1215  VT.isVector()) {
1216  if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
1217  StoreNode->isTruncatingStore()) {
1218  // Add an extra level of chain to isolate this vector
1219  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1220  // TODO: can the chain be replaced without creating a new store?
1221  SDValue NewStore = DAG.getTruncStore(
1222  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1223  MemVT, StoreNode->getAlignment(),
1224  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1225  StoreNode = cast<StoreSDNode>(NewStore);
1226  }
1227 
1228  return scalarizeVectorStore(StoreNode, DAG);
1229  }
1230 
1231  unsigned Align = StoreNode->getAlignment();
1232  if (Align < MemVT.getStoreSize() &&
1233  !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
1234  return expandUnalignedStore(StoreNode, DAG);
1235  }
1236 
1237  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1238  DAG.getConstant(2, DL, PtrVT));
1239 
1240  if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
1241  // It is beneficial to create MSKOR here instead of combiner to avoid
1242  // artificial dependencies introduced by RMW
1243  if (StoreNode->isTruncatingStore()) {
1244  assert(VT.bitsLE(MVT::i32));
1245  SDValue MaskConstant;
1246  if (MemVT == MVT::i8) {
1247  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1248  } else {
1249  assert(MemVT == MVT::i16);
1250  assert(StoreNode->getAlignment() >= 2);
1251  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1252  }
1253 
1254  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1255  DAG.getConstant(0x00000003, DL, PtrVT));
1256  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1257  DAG.getConstant(3, DL, VT));
1258 
1259  // Put the mask in correct place
1260  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1261 
1262  // Put the value bits in correct place
1263  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1264  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1265 
1266  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1267  // vector instead.
1268  SDValue Src[4] = {
1269  ShiftedValue,
1270  DAG.getConstant(0, DL, MVT::i32),
1271  DAG.getConstant(0, DL, MVT::i32),
1272  Mask
1273  };
1274  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1275  SDValue Args[3] = { Chain, Input, DWordAddr };
1277  Op->getVTList(), Args, MemVT,
1278  StoreNode->getMemOperand());
1279  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1280  // Convert pointer from byte address to dword address.
1281  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1282 
1283  if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1284  llvm_unreachable("Truncated and indexed stores not supported yet");
1285  } else {
1286  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1287  }
1288  return Chain;
1289  }
1290  }
1291 
1292  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1293  if (AS != AMDGPUASI.PRIVATE_ADDRESS)
1294  return SDValue();
1295 
1296  if (MemVT.bitsLT(MVT::i32))
1297  return lowerPrivateTruncStore(StoreNode, DAG);
1298 
1299  // Standard i32+ store, tag it with DWORDADDR to note that the address
1300  // has been shifted
1301  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1302  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1303  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1304  }
1305 
1306  // Tagged i32+ stores will be matched by patterns
1307  return SDValue();
1308 }
1309 
1310 // return (512 + (kc_bank << 12)
1311 static int
1313  switch (AddressSpace) {
1315  return 512;
1317  return 512 + 4096;
1319  return 512 + 4096 * 2;
1321  return 512 + 4096 * 3;
1323  return 512 + 4096 * 4;
1325  return 512 + 4096 * 5;
1327  return 512 + 4096 * 6;
1329  return 512 + 4096 * 7;
1331  return 512 + 4096 * 8;
1333  return 512 + 4096 * 9;
1335  return 512 + 4096 * 10;
1337  return 512 + 4096 * 11;
1339  return 512 + 4096 * 12;
1341  return 512 + 4096 * 13;
1343  return 512 + 4096 * 14;
1345  return 512 + 4096 * 15;
1346  default:
1347  return -1;
1348  }
1349 }
1350 
1351 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1352  SelectionDAG &DAG) const {
1353  SDLoc DL(Op);
1354  LoadSDNode *Load = cast<LoadSDNode>(Op);
1355  ISD::LoadExtType ExtType = Load->getExtensionType();
1356  EVT MemVT = Load->getMemoryVT();
1357  assert(Load->getAlignment() >= MemVT.getStoreSize());
1358 
1359  SDValue BasePtr = Load->getBasePtr();
1360  SDValue Chain = Load->getChain();
1361  SDValue Offset = Load->getOffset();
1362 
1363  SDValue LoadPtr = BasePtr;
1364  if (!Offset.isUndef()) {
1365  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1366  }
1367 
1368  // Get dword location
1369  // NOTE: this should be eliminated by the future SHR ptr, 2
1370  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1371  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1372 
1373  // Load dword
1374  // TODO: can we be smarter about machine pointer info?
1377  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1378 
1379  // Get offset within the register.
1380  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1381  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1382 
1383  // Bit offset of target byte (byteIdx * 8).
1384  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1385  DAG.getConstant(3, DL, MVT::i32));
1386 
1387  // Shift to the right.
1388  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1389 
1390  // Eliminate the upper bits by setting them to ...
1391  EVT MemEltVT = MemVT.getScalarType();
1392 
1393  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1394  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1395  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1396  } else { // ... or zeros.
1397  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1398  }
1399 
1400  SDValue Ops[] = {
1401  Ret,
1402  Read.getValue(1) // This should be our output chain
1403  };
1404 
1405  return DAG.getMergeValues(Ops, DL);
1406 }
1407 
1408 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1409  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1410  unsigned AS = LoadNode->getAddressSpace();
1411  EVT MemVT = LoadNode->getMemoryVT();
1412  ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1413 
1414  if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
1415  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1416  return lowerPrivateExtLoad(Op, DAG);
1417  }
1418 
1419  SDLoc DL(Op);
1420  EVT VT = Op.getValueType();
1421  SDValue Chain = LoadNode->getChain();
1422  SDValue Ptr = LoadNode->getBasePtr();
1423 
1424  if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
1425  LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
1426  VT.isVector()) {
1427  return scalarizeVectorLoad(LoadNode, DAG);
1428  }
1429 
1430  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1431  if (ConstantBlock > -1 &&
1432  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1433  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1434  SDValue Result;
1435  if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1436  isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1437  isa<ConstantSDNode>(Ptr)) {
1438  SDValue Slots[4];
1439  for (unsigned i = 0; i < 4; i++) {
1440  // We want Const position encoded with the following formula :
1441  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1442  // const_index is Ptr computed by llvm using an alignment of 16.
1443  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1444  // then div by 4 at the ISel step
1445  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1446  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1447  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1448  }
1449  EVT NewVT = MVT::v4i32;
1450  unsigned NumElements = 4;
1451  if (VT.isVector()) {
1452  NewVT = VT;
1453  NumElements = VT.getVectorNumElements();
1454  }
1455  Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1456  } else {
1457  // non-constant ptr can't be folded, keeps it as a v4f32 load
1458  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1459  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1460  DAG.getConstant(4, DL, MVT::i32)),
1461  DAG.getConstant(LoadNode->getAddressSpace() -
1463  );
1464  }
1465 
1466  if (!VT.isVector()) {
1467  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1468  DAG.getConstant(0, DL, MVT::i32));
1469  }
1470 
1471  SDValue MergedValues[2] = {
1472  Result,
1473  Chain
1474  };
1475  return DAG.getMergeValues(MergedValues, DL);
1476  }
1477 
1478  // For most operations returning SDValue() will result in the node being
1479  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1480  // need to manually expand loads that may be legal in some address spaces and
1481  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1482  // compute shaders, since the data is sign extended when it is uploaded to the
1483  // buffer. However SEXT loads from other address spaces are not supported, so
1484  // we need to expand them here.
1485  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1486  EVT MemVT = LoadNode->getMemoryVT();
1487  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1488  SDValue NewLoad = DAG.getExtLoad(
1489  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1490  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1491  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1492  DAG.getValueType(MemVT));
1493 
1494  SDValue MergedValues[2] = { Res, Chain };
1495  return DAG.getMergeValues(MergedValues, DL);
1496  }
1497 
1498  if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
1499  return SDValue();
1500  }
1501 
1502  // DWORDADDR ISD marks already shifted address
1503  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1504  assert(VT == MVT::i32);
1505  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1506  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1507  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1508  }
1509  return SDValue();
1510 }
1511 
1512 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1513  SDValue Chain = Op.getOperand(0);
1514  SDValue Cond = Op.getOperand(1);
1515  SDValue Jump = Op.getOperand(2);
1516 
1517  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1518  Chain, Jump, Cond);
1519 }
1520 
1521 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1522  SelectionDAG &DAG) const {
1523  MachineFunction &MF = DAG.getMachineFunction();
1525 
1526  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1527 
1528  unsigned FrameIndex = FIN->getIndex();
1529  unsigned IgnoredFrameReg;
1530  unsigned Offset =
1531  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1532  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1533  Op.getValueType());
1534 }
1535 
1536 /// XXX Only kernel functions are supported, so we can assume for now that
1537 /// every function is a kernel function, but in the future we should use
1538 /// separate calling conventions for kernel and non-kernel functions.
1540  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1541  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1542  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1544  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1545  *DAG.getContext());
1546  MachineFunction &MF = DAG.getMachineFunction();
1547  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
1548 
1550 
1551  if (AMDGPU::isShader(CallConv)) {
1552  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1553  } else {
1554  analyzeFormalArgumentsCompute(CCInfo, Ins);
1555  }
1556 
1557  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1558  CCValAssign &VA = ArgLocs[i];
1559  const ISD::InputArg &In = Ins[i];
1560  EVT VT = In.VT;
1561  EVT MemVT = VA.getLocVT();
1562  if (!VT.isVector() && MemVT.isVector()) {
1563  // Get load source type if scalarized.
1564  MemVT = MemVT.getVectorElementType();
1565  }
1566 
1567  if (AMDGPU::isShader(CallConv)) {
1568  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1569  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1570  InVals.push_back(Register);
1571  continue;
1572  }
1573 
1576 
1577  // i64 isn't a legal type, so the register type used ends up as i32, which
1578  // isn't expected here. It attempts to create this sextload, but it ends up
1579  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1580  // for <1 x i64>.
1581 
1582  // The first 36 bytes of the input buffer contains information about
1583  // thread group and global sizes.
1585  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1586  // FIXME: This should really check the extload type, but the handling of
1587  // extload vector parameters seems to be broken.
1588 
1589  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1590  Ext = ISD::SEXTLOAD;
1591  }
1592 
1593  // Compute the offset from the value.
1594  // XXX - I think PartOffset should give you this, but it seems to give the
1595  // size of the register which isn't useful.
1596 
1597  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1598  unsigned PartOffset = VA.getLocMemOffset();
1599  unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) +
1600  VA.getLocMemOffset();
1601 
1602  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1603  SDValue Arg = DAG.getLoad(
1604  ISD::UNINDEXED, Ext, VT, DL, Chain,
1605  DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo,
1606  MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
1609 
1610  // 4 is the preferred alignment for the CONSTANT memory space.
1611  InVals.push_back(Arg);
1612  MFI->setABIArgOffset(Offset + MemVT.getStoreSize());
1613  }
1614  return Chain;
1615 }
1616 
1618  EVT VT) const {
1619  if (!VT.isVector())
1620  return MVT::i32;
1622 }
1623 
1625  const SelectionDAG &DAG) const {
1626  // Local and Private addresses do not handle vectors. Limit to i32
1627  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
1628  return (MemVT.getSizeInBits() <= 32);
1629  }
1630  return true;
1631 }
1632 
1634  unsigned AddrSpace,
1635  unsigned Align,
1636  bool *IsFast) const {
1637  if (IsFast)
1638  *IsFast = false;
1639 
1640  if (!VT.isSimple() || VT == MVT::Other)
1641  return false;
1642 
1643  if (VT.bitsLT(MVT::i32))
1644  return false;
1645 
1646  // TODO: This is a rough estimate.
1647  if (IsFast)
1648  *IsFast = true;
1649 
1650  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1651 }
1652 
1654  SelectionDAG &DAG, SDValue VectorEntry,
1655  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1656  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1657  assert(RemapSwizzle.empty());
1658  SDValue NewBldVec[4] = {
1659  VectorEntry.getOperand(0),
1660  VectorEntry.getOperand(1),
1661  VectorEntry.getOperand(2),
1662  VectorEntry.getOperand(3)
1663  };
1664 
1665  for (unsigned i = 0; i < 4; i++) {
1666  if (NewBldVec[i].isUndef())
1667  // We mask write here to teach later passes that the ith element of this
1668  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1669  // break false dependencies and additionnaly make assembly easier to read.
1670  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1671  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1672  if (C->isZero()) {
1673  RemapSwizzle[i] = 4; // SEL_0
1674  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1675  } else if (C->isExactlyValue(1.0)) {
1676  RemapSwizzle[i] = 5; // SEL_1
1677  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1678  }
1679  }
1680 
1681  if (NewBldVec[i].isUndef())
1682  continue;
1683  for (unsigned j = 0; j < i; j++) {
1684  if (NewBldVec[i] == NewBldVec[j]) {
1685  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1686  RemapSwizzle[i] = j;
1687  break;
1688  }
1689  }
1690  }
1691 
1692  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1693  NewBldVec);
1694 }
1695 
1697  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1698  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1699  assert(RemapSwizzle.empty());
1700  SDValue NewBldVec[4] = {
1701  VectorEntry.getOperand(0),
1702  VectorEntry.getOperand(1),
1703  VectorEntry.getOperand(2),
1704  VectorEntry.getOperand(3)
1705  };
1706  bool isUnmovable[4] = { false, false, false, false };
1707  for (unsigned i = 0; i < 4; i++) {
1708  RemapSwizzle[i] = i;
1709  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1710  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1711  ->getZExtValue();
1712  if (i == Idx)
1713  isUnmovable[Idx] = true;
1714  }
1715  }
1716 
1717  for (unsigned i = 0; i < 4; i++) {
1718  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1719  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1720  ->getZExtValue();
1721  if (isUnmovable[Idx])
1722  continue;
1723  // Swap i and Idx
1724  std::swap(NewBldVec[Idx], NewBldVec[i]);
1725  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1726  break;
1727  }
1728  }
1729 
1730  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1731  NewBldVec);
1732 }
1733 
1734 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1735  SelectionDAG &DAG,
1736  const SDLoc &DL) const {
1737  assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1738  // Old -> New swizzle values
1739  DenseMap<unsigned, unsigned> SwizzleRemap;
1740 
1741  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1742  for (unsigned i = 0; i < 4; i++) {
1743  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1744  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1745  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1746  }
1747 
1748  SwizzleRemap.clear();
1749  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1750  for (unsigned i = 0; i < 4; i++) {
1751  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1752  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1753  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1754  }
1755 
1756  return BuildVector;
1757 }
1758 
1759 //===----------------------------------------------------------------------===//
1760 // Custom DAG Optimizations
1761 //===----------------------------------------------------------------------===//
1762 
1764  DAGCombinerInfo &DCI) const {
1765  SelectionDAG &DAG = DCI.DAG;
1766  SDLoc DL(N);
1767 
1768  switch (N->getOpcode()) {
1769  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1770  case ISD::FP_ROUND: {
1771  SDValue Arg = N->getOperand(0);
1772  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1773  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1774  Arg.getOperand(0));
1775  }
1776  break;
1777  }
1778 
1779  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1780  // (i32 select_cc f32, f32, -1, 0 cc)
1781  //
1782  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1783  // this to one of the SET*_DX10 instructions.
1784  case ISD::FP_TO_SINT: {
1785  SDValue FNeg = N->getOperand(0);
1786  if (FNeg.getOpcode() != ISD::FNEG) {
1787  return SDValue();
1788  }
1789  SDValue SelectCC = FNeg.getOperand(0);
1790  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1791  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1792  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1793  !isHWTrueValue(SelectCC.getOperand(2)) ||
1794  !isHWFalseValue(SelectCC.getOperand(3))) {
1795  return SDValue();
1796  }
1797 
1798  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1799  SelectCC.getOperand(0), // LHS
1800  SelectCC.getOperand(1), // RHS
1801  DAG.getConstant(-1, DL, MVT::i32), // True
1802  DAG.getConstant(0, DL, MVT::i32), // False
1803  SelectCC.getOperand(4)); // CC
1804 
1805  break;
1806  }
1807 
1808  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1809  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1810  case ISD::INSERT_VECTOR_ELT: {
1811  SDValue InVec = N->getOperand(0);
1812  SDValue InVal = N->getOperand(1);
1813  SDValue EltNo = N->getOperand(2);
1814 
1815  // If the inserted element is an UNDEF, just use the input vector.
1816  if (InVal.isUndef())
1817  return InVec;
1818 
1819  EVT VT = InVec.getValueType();
1820 
1821  // If we can't generate a legal BUILD_VECTOR, exit
1823  return SDValue();
1824 
1825  // Check that we know which element is being inserted
1826  if (!isa<ConstantSDNode>(EltNo))
1827  return SDValue();
1828  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1829 
1830  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1831  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1832  // vector elements.
1834  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1835  Ops.append(InVec.getNode()->op_begin(),
1836  InVec.getNode()->op_end());
1837  } else if (InVec.isUndef()) {
1838  unsigned NElts = VT.getVectorNumElements();
1839  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1840  } else {
1841  return SDValue();
1842  }
1843 
1844  // Insert the element
1845  if (Elt < Ops.size()) {
1846  // All the operands of BUILD_VECTOR must have the same type;
1847  // we enforce that here.
1848  EVT OpVT = Ops[0].getValueType();
1849  if (InVal.getValueType() != OpVT)
1850  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1851  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1852  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1853  Ops[Elt] = InVal;
1854  }
1855 
1856  // Return the new vector
1857  return DAG.getBuildVector(VT, DL, Ops);
1858  }
1859 
1860  // Extract_vec (Build_vector) generated by custom lowering
1861  // also needs to be customly combined
1862  case ISD::EXTRACT_VECTOR_ELT: {
1863  SDValue Arg = N->getOperand(0);
1864  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1865  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1866  unsigned Element = Const->getZExtValue();
1867  return Arg->getOperand(Element);
1868  }
1869  }
1870  if (Arg.getOpcode() == ISD::BITCAST &&
1871  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1874  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1875  unsigned Element = Const->getZExtValue();
1876  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1877  Arg->getOperand(0).getOperand(Element));
1878  }
1879  }
1880  break;
1881  }
1882 
1883  case ISD::SELECT_CC: {
1884  // Try common optimizations
1886  return Ret;
1887 
1888  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1889  // selectcc x, y, a, b, inv(cc)
1890  //
1891  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1892  // selectcc x, y, a, b, cc
1893  SDValue LHS = N->getOperand(0);
1894  if (LHS.getOpcode() != ISD::SELECT_CC) {
1895  return SDValue();
1896  }
1897 
1898  SDValue RHS = N->getOperand(1);
1899  SDValue True = N->getOperand(2);
1900  SDValue False = N->getOperand(3);
1901  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1902 
1903  if (LHS.getOperand(2).getNode() != True.getNode() ||
1904  LHS.getOperand(3).getNode() != False.getNode() ||
1905  RHS.getNode() != False.getNode()) {
1906  return SDValue();
1907  }
1908 
1909  switch (NCC) {
1910  default: return SDValue();
1911  case ISD::SETNE: return LHS;
1912  case ISD::SETEQ: {
1913  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1914  LHSCC = ISD::getSetCCInverse(LHSCC,
1915  LHS.getOperand(0).getValueType().isInteger());
1916  if (DCI.isBeforeLegalizeOps() ||
1918  return DAG.getSelectCC(DL,
1919  LHS.getOperand(0),
1920  LHS.getOperand(1),
1921  LHS.getOperand(2),
1922  LHS.getOperand(3),
1923  LHSCC);
1924  break;
1925  }
1926  }
1927  return SDValue();
1928  }
1929 
1930  case AMDGPUISD::R600_EXPORT: {
1931  SDValue Arg = N->getOperand(1);
1932  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1933  break;
1934 
1935  SDValue NewArgs[8] = {
1936  N->getOperand(0), // Chain
1937  SDValue(),
1938  N->getOperand(2), // ArrayBase
1939  N->getOperand(3), // Type
1940  N->getOperand(4), // SWZ_X
1941  N->getOperand(5), // SWZ_Y
1942  N->getOperand(6), // SWZ_Z
1943  N->getOperand(7) // SWZ_W
1944  };
1945  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1946  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1947  }
1948  case AMDGPUISD::TEXTURE_FETCH: {
1949  SDValue Arg = N->getOperand(1);
1950  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1951  break;
1952 
1953  SDValue NewArgs[19] = {
1954  N->getOperand(0),
1955  N->getOperand(1),
1956  N->getOperand(2),
1957  N->getOperand(3),
1958  N->getOperand(4),
1959  N->getOperand(5),
1960  N->getOperand(6),
1961  N->getOperand(7),
1962  N->getOperand(8),
1963  N->getOperand(9),
1964  N->getOperand(10),
1965  N->getOperand(11),
1966  N->getOperand(12),
1967  N->getOperand(13),
1968  N->getOperand(14),
1969  N->getOperand(15),
1970  N->getOperand(16),
1971  N->getOperand(17),
1972  N->getOperand(18),
1973  };
1974  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1975  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1976  }
1977  default: break;
1978  }
1979 
1981 }
1982 
1983 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1984  SDValue &Src, SDValue &Neg, SDValue &Abs,
1985  SDValue &Sel, SDValue &Imm,
1986  SelectionDAG &DAG) const {
1988  if (!Src.isMachineOpcode())
1989  return false;
1990 
1991  switch (Src.getMachineOpcode()) {
1992  case AMDGPU::FNEG_R600:
1993  if (!Neg.getNode())
1994  return false;
1995  Src = Src.getOperand(0);
1996  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1997  return true;
1998  case AMDGPU::FABS_R600:
1999  if (!Abs.getNode())
2000  return false;
2001  Src = Src.getOperand(0);
2002  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2003  return true;
2004  case AMDGPU::CONST_COPY: {
2005  unsigned Opcode = ParentNode->getMachineOpcode();
2006  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2007 
2008  if (!Sel.getNode())
2009  return false;
2010 
2011  SDValue CstOffset = Src.getOperand(0);
2012  if (ParentNode->getValueType(0).isVector())
2013  return false;
2014 
2015  // Gather constants values
2016  int SrcIndices[] = {
2017  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2018  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2019  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2020  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2021  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2022  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2023  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2024  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2025  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2026  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2027  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2028  };
2029  std::vector<unsigned> Consts;
2030  for (int OtherSrcIdx : SrcIndices) {
2031  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2032  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2033  continue;
2034  if (HasDst) {
2035  OtherSrcIdx--;
2036  OtherSelIdx--;
2037  }
2038  if (RegisterSDNode *Reg =
2039  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2040  if (Reg->getReg() == AMDGPU::ALU_CONST) {
2041  ConstantSDNode *Cst
2042  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2043  Consts.push_back(Cst->getZExtValue());
2044  }
2045  }
2046  }
2047 
2048  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2049  Consts.push_back(Cst->getZExtValue());
2050  if (!TII->fitsConstReadLimitations(Consts)) {
2051  return false;
2052  }
2053 
2054  Sel = CstOffset;
2055  Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2056  return true;
2057  }
2058  case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2059  // Check if the Imm slot is used. Taken from below.
2060  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2061  return false;
2062  Imm = Src.getOperand(0);
2063  Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2064  return true;
2065  case AMDGPU::MOV_IMM_I32:
2066  case AMDGPU::MOV_IMM_F32: {
2067  unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2068  uint64_t ImmValue = 0;
2069 
2070  if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2072  float FloatValue = FPC->getValueAPF().convertToFloat();
2073  if (FloatValue == 0.0) {
2074  ImmReg = AMDGPU::ZERO;
2075  } else if (FloatValue == 0.5) {
2076  ImmReg = AMDGPU::HALF;
2077  } else if (FloatValue == 1.0) {
2078  ImmReg = AMDGPU::ONE;
2079  } else {
2080  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2081  }
2082  } else {
2084  uint64_t Value = C->getZExtValue();
2085  if (Value == 0) {
2086  ImmReg = AMDGPU::ZERO;
2087  } else if (Value == 1) {
2088  ImmReg = AMDGPU::ONE_INT;
2089  } else {
2090  ImmValue = Value;
2091  }
2092  }
2093 
2094  // Check that we aren't already using an immediate.
2095  // XXX: It's possible for an instruction to have more than one
2096  // immediate operand, but this is not supported yet.
2097  if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2098  if (!Imm.getNode())
2099  return false;
2101  assert(C);
2102  if (C->getZExtValue())
2103  return false;
2104  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2105  }
2106  Src = DAG.getRegister(ImmReg, MVT::i32);
2107  return true;
2108  }
2109  default:
2110  return false;
2111  }
2112 }
2113 
2114 /// Fold the instructions after selecting them
2115 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2116  SelectionDAG &DAG) const {
2118  if (!Node->isMachineOpcode())
2119  return Node;
2120 
2121  unsigned Opcode = Node->getMachineOpcode();
2122  SDValue FakeOp;
2123 
2124  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2125 
2126  if (Opcode == AMDGPU::DOT_4) {
2127  int OperandIdx[] = {
2128  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2129  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2130  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2131  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2132  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2133  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2134  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2135  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2136  };
2137  int NegIdx[] = {
2138  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2139  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2140  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2141  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2142  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2143  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2144  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2145  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2146  };
2147  int AbsIdx[] = {
2148  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2149  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2150  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2151  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2152  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2153  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2154  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2155  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2156  };
2157  for (unsigned i = 0; i < 8; i++) {
2158  if (OperandIdx[i] < 0)
2159  return Node;
2160  SDValue &Src = Ops[OperandIdx[i] - 1];
2161  SDValue &Neg = Ops[NegIdx[i] - 1];
2162  SDValue &Abs = Ops[AbsIdx[i] - 1];
2163  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2164  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2165  if (HasDst)
2166  SelIdx--;
2167  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2168  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2169  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2170  }
2171  } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2172  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2173  SDValue &Src = Ops[i];
2174  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2175  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2176  }
2177  } else {
2178  if (!TII->hasInstrModifiers(Opcode))
2179  return Node;
2180  int OperandIdx[] = {
2181  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2182  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2183  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2184  };
2185  int NegIdx[] = {
2186  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2187  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2188  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2189  };
2190  int AbsIdx[] = {
2191  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2192  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2193  -1
2194  };
2195  for (unsigned i = 0; i < 3; i++) {
2196  if (OperandIdx[i] < 0)
2197  return Node;
2198  SDValue &Src = Ops[OperandIdx[i] - 1];
2199  SDValue &Neg = Ops[NegIdx[i] - 1];
2200  SDValue FakeAbs;
2201  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2202  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2203  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2204  int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2205  if (HasDst) {
2206  SelIdx--;
2207  ImmIdx--;
2208  }
2209  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2210  SDValue &Imm = Ops[ImmIdx];
2211  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2212  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2213  }
2214  }
2215 
2216  return Node;
2217 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:539
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:506
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const SDValue & getOffset() const
bool isUndef() const
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1547
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:611
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it&#39;s reasonable to merge stores to MemVT size.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:230
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const SDValue & getValue() const
SDVTList getVTList() const
unsigned Reg
static PointerType * getInt32PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:228
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
float convertToFloat() const
Definition: APFloat.h:1098
const SDValue & getChain() const
Function Alias Analysis Results
unsigned getAlignment() const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:405
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:426
Pointer to the start of the shader&#39;s constant data.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
#define MO_FLAG_ABS
Definition: R600Defines.h:19
Shift and rotation operations.
Definition: ISDOpcodes.h:380
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
int getLDSNoRetOp(uint16_t Opcode)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
The memory access is dereferenceable (i.e., doesn&#39;t trap).
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:311
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:457
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:398
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This file implements a class to represent arbitrary precision integral constant values and operations...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool hasInstrModifiers(unsigned Opcode) const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:395
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
const AMDGPUSubtarget * Subtarget
#define MO_FLAG_NEG
Definition: R600Defines.h:18
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
Address space for indirect addressible parameter memory (VTX1)
Definition: AMDGPU.h:239
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:865
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:911
op_iterator op_begin() const
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:571
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
Class to represent pointers.
Definition: DerivedTypes.h:467
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:492
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:303
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
const R600FrameLowering * getFrameLowering() const override
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const SDValue & getBasePtr() const
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:235
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
unsigned const MachineRegisterInfo * MRI
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1368
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:260
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isMachineOpcode() const
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static int ConstantAddressBlock(unsigned AddressSpace)
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:886
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:308
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:247
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
const SDValue & getOffset() const
This file declares a class to represent arbitrary precision floating point values and provide a varie...
const R600InstrInfo * getInstrInfo() const override
unsigned getMachineOpcode() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
The memory access is non-temporal.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1382
const AMDGPUAS & AS
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
unsigned getAddressSpace() const
const APFloat & getValueAPF() const
Definition: Constants.h:299
const R600RegisterInfo * getRegisterInfo() const override
static bool isUndef(ArrayRef< int > Mask)
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
CCState - This class holds information needed while lowering arguments and return values...
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:315
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:729
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:22
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:605
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
Promote Memory to Register
Definition: Mem2Reg.cpp:110
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
int64_t getImm() const
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isShader(CallingConv::ID cc)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:390
Interface for the AMDGPU Implementation of the Intrinsic Info class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:441
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:395
bool isLDSRetInstr(unsigned Opcode) const
amdgpu Simplify well known AMD library false Value Value * Arg
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getOrigArgIndex() const
const R600Subtarget * getSubtarget() const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:700
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
R600 DAG Lowering interface definition.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:456
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:575
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getOpcode() const
SDValue getValue(unsigned R) const
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:93
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
const MachinePointerInfo & getPointerInfo() const
void setABIArgOffset(unsigned NewOffset)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:278
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:977
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override
Determine if the target supports unaligned memory accesses.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static bool isEOP(MachineBasicBlock::iterator I)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
IRTranslator LLVM IR MI
const APFloat & getValueAPF() const
#define MO_FLAG_MASK
Definition: R600Defines.h:20
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:413
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
Conversion operators.
Definition: ISDOpcodes.h:435
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:750
unsigned getLocReg() const
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT SrcTy)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
uint64_t getZExtValue() const
Address space for local memory.
Definition: AMDGPU.h:232
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:444
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
Address space for constant memory (VTX2)
Definition: AMDGPU.h:231
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:555
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:746
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
LLVMContext * getContext() const
Definition: SelectionDAG.h:404
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
unsigned PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:222
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:617
This class is used to represent ISD::LOAD nodes.
unsigned getStackWidth(const MachineFunction &MF) const
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.